From c45ff300ad12b8a625ed81fd5a42a0549456fd4a Mon Sep 17 00:00:00 2001 From: Bill Neubauer Date: Thu, 5 Oct 2017 15:08:59 -0700 Subject: [PATCH 01/25] Refactor versioning information for archetypes. This makes versioning consistent with the Beam POMs. The top-level POM contains the version numbers, and versions are plumbed into the archetypes via properties. --- .../resources/archetype-resources/pom.xml | 47 ++++++++++++------- .../resources/archetype-resources/pom.xml | 44 +++++++++++------ .../resources/archetype-resources/pom.xml | 11 +++-- .../projects/basic/reference/pom.xml | 11 +++-- pom.xml | 13 +++++ 5 files changed, 87 insertions(+), 39 deletions(-) diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml index f33914d476..12d2783a0f 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml +++ b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml @@ -27,7 +27,22 @@ UTF-8 - 2.20 + + @bigquery.version@ + @google-clients.version@ + @guava.version@ + @hamcrest.version@ + @jackson.version@ + @joda.version@ + @junit.version@ + @maven-compiler-plugin.version@ + @maven-exec-plugin.version@ + @maven-jar-plugin.version@ + @maven-shade-plugin.version@ + @mockito.version@ + @pubsub.version@ + @slf4j.version@ + @surefire-plugin.version@ @@ -49,7 +64,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.6.1 + ${maven-compiler-plugin.version} 1.8 1.8 @@ -79,7 +94,7 @@ org.apache.maven.plugins maven-jar-plugin - 3.0.2 + ${maven-jar-plugin.version} @@ -158,7 +173,7 @@ com.google.apis google-api-services-bigquery - v2-rev295-1.22.0 + ${bigquery.version} @@ -172,7 +187,7 @@ com.google.http-client google-http-client - 1.22.0 + ${google-clients.version} @@ -186,7 +201,7 @@ com.google.apis google-api-services-pubsub - v1-rev10-1.22.0 + ${pubsub.version} @@ -200,26 +215,26 @@ joda-time joda-time - 2.4 + ${joda.version} com.google.guava guava - 20.0 + ${guava.version} org.slf4j slf4j-api - 1.7.14 + ${slf4j.version} org.slf4j slf4j-jdk14 - 1.7.14 + ${slf4j.version} runtime @@ -229,19 +244,19 @@ org.hamcrest hamcrest-all - 1.3 + ${hamcrest.version} junit junit - 4.12 + ${junit.version} org.mockito mockito-all - 1.9.5 + ${mockito.version} test diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml index 28ae0db9fe..2c2b8d3fc6 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml @@ -27,7 +27,21 @@ UTF-8 - 2.20 + + @bigquery.version@ + @google-clients.version@ + @guava.version@ + @hamcrest.version@ + @jackson.version@ + @joda.version@ + @junit.version@ + @maven-compiler-plugin.version@ + @maven-exec-plugin.version@ + @maven-jar-plugin.version@ + @maven-shade-plugin.version@ + @pubsub.version@ + @slf4j.version@ + @surefire-plugin.version@ @@ -49,7 +63,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.6.1 + ${maven-compiler-plugin.version} ${targetPlatform} ${targetPlatform} @@ -79,7 +93,7 @@ org.apache.maven.plugins maven-jar-plugin - 3.0.2 + ${maven-jar-plugin.version} @@ -158,7 +172,7 @@ com.google.apis google-api-services-bigquery - v2-rev295-1.22.0 + ${bigquery.version} @@ -172,7 +186,7 @@ com.google.http-client google-http-client - 1.22.0 + ${google-clients.version} @@ -186,7 +200,7 @@ com.google.apis google-api-services-pubsub - v1-rev10-1.22.0 + ${pubsub.version} @@ -200,26 +214,26 @@ joda-time joda-time - 2.4 + ${joda.version} com.google.guava guava - 20.0 + ${guava.version} org.slf4j slf4j-api - 1.7.14 + ${slf4j.version} org.slf4j slf4j-jdk14 - 1.7.14 + ${slf4j.version} runtime @@ -229,13 +243,13 @@ org.hamcrest hamcrest-all - 1.3 + ${hamcrest.version} junit junit - 4.12 + ${junit.version} diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml index 75eaaade81..22f717f97a 100644 --- a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml +++ b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml @@ -25,6 +25,9 @@ UTF-8 + @maven-compiler-plugin.version@ + @maven-exec-plugin.version@ + @slf4j.version@ @@ -46,7 +49,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.6.1 + ${maven-compiler-plugin.version} ${targetPlatform} ${targetPlatform} @@ -59,7 +62,7 @@ org.codehaus.mojo exec-maven-plugin - 1.5.0 + ${maven-exec-plugin.version} false @@ -79,12 +82,12 @@ org.slf4j slf4j-api - 1.7.14 + ${slf4j.version} org.slf4j slf4j-jdk14 - 1.7.14 + ${slf4j.version} diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml index fc0940bf2d..8e4edbd29e 100644 --- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml +++ b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml @@ -25,6 +25,9 @@ UTF-8 + @maven-compiler-plugin.version@ + @maven-exec-plugin.version@ + @slf4j.version@ @@ -46,7 +49,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.6.1 + ${maven-compiler-plugin.version} 1.7 1.7 @@ -59,7 +62,7 @@ org.codehaus.mojo exec-maven-plugin - 1.5.0 + ${maven-exec-plugin.version} false @@ -79,12 +82,12 @@ org.slf4j slf4j-api - 1.7.14 + ${slf4j.version} org.slf4j slf4j-jdk14 - 1.7.14 + ${slf4j.version} diff --git a/pom.xml b/pom.xml index f9a662c658..a54ec1c5e9 100644 --- a/pom.xml +++ b/pom.xml @@ -110,7 +110,20 @@ 6 1 + v2-rev295-1.22.0 + 1.22.0 + 20.0 + 1.3 + 2.4 4.12 + 3.6.1 + 1.4.0 + 3.0.2 + 3.0.0 + 1.9.5 + v1-rev10-1.22.0 + 1.7.14 + 2.20 pom From 63cd3ae0c0ed1c6f4b820759d5666be27f241c29 Mon Sep 17 00:00:00 2001 From: Bill Neubauer Date: Wed, 11 Oct 2017 11:56:07 -0700 Subject: [PATCH 02/25] Set executor version to 1.5.0 Leaving the executor version where it was, rather than changing it to match Beam. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a54ec1c5e9..e4b917f0cf 100644 --- a/pom.xml +++ b/pom.xml @@ -117,7 +117,7 @@ 2.4 4.12 3.6.1 - 1.4.0 + 1.5.0 3.0.2 3.0.0 1.9.5 From 8a1b42be31802e40571566dc285bf1b37f6555bd Mon Sep 17 00:00:00 2001 From: Kenneth Knowles Date: Wed, 6 Dec 2017 15:13:20 -0800 Subject: [PATCH 03/25] Upgrade Beam to version 2.2.0 --- .../src/main/java/WordCount.java | 4 ++++ .../main/java/common/WriteOneFilePerWindow.java | 8 ++++---- .../java/complete/game/injector/Injector.java | 2 +- .../complete/game/injector/InjectorUtils.java | 2 +- .../java/complete/game/utils/WriteToText.java | 8 ++++---- .../src/test/java/DebuggingWordCountTest.java | 11 +++++++++-- .../src/main/java/WordCount.java | 4 ++++ .../src/test/java/DebuggingWordCountTest.java | 11 +++++++++-- pom.xml | 15 ++++++++------- 9 files changed, 44 insertions(+), 21 deletions(-) diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WordCount.java index 79b71403b9..9947a26eda 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WordCount.java +++ b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WordCount.java @@ -21,6 +21,7 @@ import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.metrics.Counter; +import org.apache.beam.sdk.metrics.Distribution; import org.apache.beam.sdk.metrics.Metrics; import org.apache.beam.sdk.options.Default; import org.apache.beam.sdk.options.Description; @@ -88,9 +89,12 @@ public class WordCount { */ static class ExtractWordsFn extends DoFn { private final Counter emptyLines = Metrics.counter(ExtractWordsFn.class, "emptyLines"); + private final Distribution lineLenDist = Metrics.distribution( + ExtractWordsFn.class, "lineLenDistro"); @ProcessElement public void processElement(ProcessContext c) { + lineLenDist.update(c.element().length()); if (c.element().trim().isEmpty()) { emptyLines.inc(); } diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java index c7296162b6..9796d647b5 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java +++ b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java @@ -91,10 +91,10 @@ public String filenamePrefixForWindow(IntervalWindow window) { @Override public ResourceId windowedFilename(int shardNumber, - int numShards, - BoundedWindow window, - PaneInfo paneInfo, - OutputFileHints outputFileHints) { + int numShards, + BoundedWindow window, + PaneInfo paneInfo, + OutputFileHints outputFileHints) { IntervalWindow intervalWindow = (IntervalWindow) window; String filename = String.format( diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java index 4814ffb66f..980966e0ce 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java +++ b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java @@ -167,7 +167,7 @@ long getStartTimeInMillis() { return startTimeInMillis; } long getEndTimeInMillis() { - return startTimeInMillis + (expirationPeriod * 60 * 1000); + return startTimeInMillis + (expirationPeriod * 60L * 1000L); } String getRandomUser() { int userNum = random.nextInt(numMembers); diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java index 55e8c7a8c3..ddcbff4f41 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java +++ b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java @@ -93,7 +93,7 @@ public static void createTopic(Pubsub client, String fullTopicName) Topic topic = client.projects().topics() .create(fullTopicName, new Topic()) .execute(); - System.out.printf("Topic %s was created.\n", topic.getName()); + System.out.printf("Topic %s was created.%n", topic.getName()); } } } diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java index 7d8d19f70d..dbd5e39977 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java +++ b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java @@ -145,10 +145,10 @@ public String filenamePrefixForWindow(IntervalWindow window) { @Override public ResourceId windowedFilename(int shardNumber, - int numShards, - BoundedWindow window, - PaneInfo paneInfo, - OutputFileHints outputFileHints) { + int numShards, + BoundedWindow window, + PaneInfo paneInfo, + OutputFileHints outputFileHints) { IntervalWindow intervalWindow = (IntervalWindow) window; String filename = String.format( diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java index 155242d996..26e1498d71 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java +++ b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java @@ -35,6 +35,13 @@ public class DebuggingWordCountTest { @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); + private String getFilePath(String filePath) { + if (filePath.contains(":")) { + return filePath.replace("\\", "/").split(":")[1]; + } + return filePath; + } + @Test public void testDebuggingWordCount() throws Exception { File inputFile = tmpFolder.newFile(); @@ -45,8 +52,8 @@ public void testDebuggingWordCount() throws Exception { StandardCharsets.UTF_8); WordCountOptions options = TestPipeline.testingPipelineOptions().as(WordCountOptions.class); - options.setInputFile(inputFile.getAbsolutePath()); - options.setOutput(outputFile.getAbsolutePath()); + options.setInputFile(getFilePath(inputFile.getAbsolutePath())); + options.setOutput(getFilePath(outputFile.getAbsolutePath())); DebuggingWordCount.main(TestPipeline.convertToArgs(options)); } } diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java index 79b71403b9..9947a26eda 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java @@ -21,6 +21,7 @@ import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.metrics.Counter; +import org.apache.beam.sdk.metrics.Distribution; import org.apache.beam.sdk.metrics.Metrics; import org.apache.beam.sdk.options.Default; import org.apache.beam.sdk.options.Description; @@ -88,9 +89,12 @@ public class WordCount { */ static class ExtractWordsFn extends DoFn { private final Counter emptyLines = Metrics.counter(ExtractWordsFn.class, "emptyLines"); + private final Distribution lineLenDist = Metrics.distribution( + ExtractWordsFn.class, "lineLenDistro"); @ProcessElement public void processElement(ProcessContext c) { + lineLenDist.update(c.element().length()); if (c.element().trim().isEmpty()) { emptyLines.inc(); } diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java index 155242d996..26e1498d71 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java @@ -35,6 +35,13 @@ public class DebuggingWordCountTest { @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); + private String getFilePath(String filePath) { + if (filePath.contains(":")) { + return filePath.replace("\\", "/").split(":")[1]; + } + return filePath; + } + @Test public void testDebuggingWordCount() throws Exception { File inputFile = tmpFolder.newFile(); @@ -45,8 +52,8 @@ public void testDebuggingWordCount() throws Exception { StandardCharsets.UTF_8); WordCountOptions options = TestPipeline.testingPipelineOptions().as(WordCountOptions.class); - options.setInputFile(inputFile.getAbsolutePath()); - options.setOutput(outputFile.getAbsolutePath()); + options.setInputFile(getFilePath(inputFile.getAbsolutePath())); + options.setOutput(getFilePath(outputFile.getAbsolutePath())); DebuggingWordCount.main(TestPipeline.convertToArgs(options)); } } diff --git a/pom.xml b/pom.xml index e4b917f0cf..a3ce24f916 100644 --- a/pom.xml +++ b/pom.xml @@ -103,27 +103,28 @@ ${maven.build.timestamp} yyyy-MM-dd HH:mm - 2.2.0-SNAPSHOT + 2.2.0 Google Cloud Dataflow SDK for Java ${project.version}-20170517 6 1 - v2-rev295-1.22.0 + v2-rev355-1.22.0 1.22.0 20.0 1.3 2.4 4.12 - 3.6.1 - 1.5.0 - 3.0.2 - 3.0.0 1.9.5 v1-rev10-1.22.0 - 1.7.14 + 1.7.25 + 2.20 + 3.6.2 + 1.6.0 + 3.0.2 + 3.0.0 pom From a363bb33af4a8de572de15c1344fbc3e4133220a Mon Sep 17 00:00:00 2001 From: Batkhuyag Batsaikhan Date: Mon, 26 Feb 2018 17:51:59 -0800 Subject: [PATCH 04/25] Upgrade to Apache Beam, version 2.3.0 --- examples/pom.xml | 5 - .../dataflow/sdk/ExamplesDependencies.java | 4 +- maven-archetypes/examples-java8/pom.xml | 80 ---- .../META-INF/maven/archetype-metadata.xml | 38 -- .../examples-java8/src/main/resources/NOTICE | 5 - .../resources/archetype-resources/pom.xml | 263 ------------ .../src/main/java/DebuggingWordCount.java | 162 ------- .../src/main/java/MinimalWordCount.java | 119 ----- .../src/main/java/MinimalWordCountJava8.java | 72 ---- .../src/main/java/WindowedWordCount.java | 223 ---------- .../src/main/java/WordCount.java | 190 -------- .../common/ExampleBigQueryTableOptions.java | 55 --- ...mplePubsubTopicAndSubscriptionOptions.java | 45 -- .../common/ExamplePubsubTopicOptions.java | 45 -- .../src/main/java/common/ExampleUtils.java | 406 ------------------ .../java/common/WriteOneFilePerWindow.java | 117 ----- .../src/test/java/DebuggingWordCountTest.java | 59 --- .../src/test/java/WordCountTest.java | 86 ---- .../projects/basic/archetype.properties | 19 - .../test/resources/projects/basic/goal.txt | 1 - maven-archetypes/examples/pom.xml | 6 +- .../META-INF/maven/archetype-metadata.xml | 2 +- .../resources/archetype-resources/pom.xml | 13 +- .../src/main/java/MinimalWordCount.java | 88 ++-- .../src/main/java/WindowedWordCount.java | 7 +- .../src/main/java/WordCount.java | 3 +- .../src/main/java/common/ExampleUtils.java | 63 +-- .../main/java/complete/game/GameStats.java | 172 ++++---- .../java/complete/game/HourlyTeamScore.java | 80 ++-- .../main/java/complete/game/LeaderBoard.java | 50 +-- .../java/complete/game/StatefulTeamScore.java | 227 ++++++++++ .../main/java/complete/game/UserScore.java | 18 +- .../java/complete/game/injector/Injector.java | 91 ++-- .../complete/game/injector/InjectorUtils.java | 0 .../injector/RetryHttpInitializerWrapper.java | 42 +- .../complete/game/utils/GameConstants.java} | 26 +- .../complete/game/utils/WriteToBigQuery.java | 0 .../java/complete/game/utils/WriteToText.java | 2 +- .../game/utils/WriteWindowedToBigQuery.java | 0 .../src/test/java/MinimalWordCountTest.java} | 57 +-- .../src/test/java/WordCountTest.java | 3 +- .../java/complete/game/GameStatsTest.java | 0 .../complete/game/HourlyTeamScoreTest.java | 0 .../java/complete/game/LeaderBoardTest.java | 16 +- .../complete/game/StatefulTeamScoreTest.java | 208 +++++++++ .../java/complete/game/UserScoreTest.java | 0 .../projects/basic/archetype.properties | 2 +- maven-archetypes/pom.xml | 1 - maven-archetypes/starter/pom.xml | 6 +- .../META-INF/maven/archetype-metadata.xml | 2 +- .../projects/basic/archetype.properties | 2 +- .../projects/basic/reference/pom.xml | 4 +- pom.xml | 85 ++-- 53 files changed, 840 insertions(+), 2430 deletions(-) delete mode 100644 maven-archetypes/examples-java8/pom.xml delete mode 100644 maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml delete mode 100644 maven-archetypes/examples-java8/src/main/resources/NOTICE delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCountJava8.java delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WordCount.java delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java delete mode 100644 maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/WordCountTest.java delete mode 100644 maven-archetypes/examples-java8/src/test/resources/projects/basic/archetype.properties delete mode 100644 maven-archetypes/examples-java8/src/test/resources/projects/basic/goal.txt rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java (68%) rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java (75%) rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java (87%) create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/StatefulTeamScore.java rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java (94%) rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java (89%) rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java (100%) rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java (71%) rename maven-archetypes/{examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java => examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java} (55%) rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java (100%) rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java (99%) rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java (100%) rename maven-archetypes/{examples-java8/src/main/resources/archetype-resources/src/test/java/MinimalWordCountJava8Test.java => examples/src/main/resources/archetype-resources/src/test/java/MinimalWordCountTest.java} (61%) rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java (100%) rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java (100%) rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java (97%) create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java rename maven-archetypes/{examples-java8 => examples}/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java (100%) diff --git a/examples/pom.xml b/examples/pom.xml index f87ae36b1d..75a1d92174 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -42,10 +42,5 @@ org.apache.beam beam-examples-java - - - org.apache.beam - beam-examples-java8 - diff --git a/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java b/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java index 827aff8395..c51e527edb 100644 --- a/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java +++ b/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java @@ -15,7 +15,7 @@ */ package com.google.cloud.dataflow.sdk; -import org.apache.beam.examples.MinimalWordCountJava8; +import org.apache.beam.examples.MinimalWordCount; import org.apache.beam.examples.WordCount; /** @@ -25,5 +25,5 @@ class ExamplesDependencies { SdkDependencies sdkDependencies; WordCount wordCount; - MinimalWordCountJava8 minimalWordCount; + MinimalWordCount minimalWordCount; } diff --git a/maven-archetypes/examples-java8/pom.xml b/maven-archetypes/examples-java8/pom.xml deleted file mode 100644 index 463c66f1d1..0000000000 --- a/maven-archetypes/examples-java8/pom.xml +++ /dev/null @@ -1,80 +0,0 @@ - - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-archetypes-parent - 2.2.0-SNAPSHOT - ../pom.xml - - - google-cloud-dataflow-java-archetypes-examples-java8 - Google Cloud Dataflow SDK for Java - Java 8 Examples Archetype - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This archetype creates a project containing all the example - pipelines targeting Java 8. - - maven-archetype - - - - - org.apache.maven.archetype - archetype-packaging - 2.4 - - - - - - - maven-archetype-plugin - 2.4 - - - org.apache.maven.shared - maven-invoker - 2.2 - - - - - - default-integration-test - install - - integration-test - - - - - - - - - - diff --git a/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml deleted file mode 100644 index 326fdaa528..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - - src/main/java - - **/*.java - - - - - src/test/java - - **/*.java - - - - diff --git a/maven-archetypes/examples-java8/src/main/resources/NOTICE b/maven-archetypes/examples-java8/src/main/resources/NOTICE deleted file mode 100644 index 981fde5a9e..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Google Cloud Dataflow SDK for Java -Copyright 2017, Google Inc. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml deleted file mode 100644 index 12d2783a0f..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml +++ /dev/null @@ -1,263 +0,0 @@ - - - - 4.0.0 - - ${groupId} - ${artifactId} - ${version} - - jar - - - UTF-8 - - @bigquery.version@ - @google-clients.version@ - @guava.version@ - @hamcrest.version@ - @jackson.version@ - @joda.version@ - @junit.version@ - @maven-compiler-plugin.version@ - @maven-exec-plugin.version@ - @maven-jar-plugin.version@ - @maven-shade-plugin.version@ - @mockito.version@ - @pubsub.version@ - @slf4j.version@ - @surefire-plugin.version@ - - - - - ossrh.snapshots - Sonatype OSS Repository Hosting - https://oss.sonatype.org/content/repositories/snapshots/ - - false - - - true - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - ${maven-compiler-plugin.version} - - 1.8 - 1.8 - - - - - org.apache.maven.plugins - maven-surefire-plugin - ${surefire-plugin.version} - - all - 4 - true - - - - org.apache.maven.surefire - surefire-junit47 - ${surefire-plugin.version} - - - - - - - org.apache.maven.plugins - maven-jar-plugin - ${maven-jar-plugin.version} - - - - - org.apache.maven.plugins - maven-shade-plugin - ${maven-shade-plugin.version} - - - package - - shade - - - ${project.artifactId}-bundled-${project.version} - - - *:* - - META-INF/LICENSE - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - - - - - - - org.codehaus.mojo - exec-maven-plugin - ${maven-exec-plugin.version} - - false - - - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - @project.version@ - - - - - com.google.api-client - google-api-client - ${google-clients.version} - - - - com.google.guava - guava-jdk5 - - - - - - com.google.apis - google-api-services-bigquery - ${bigquery.version} - - - - com.google.guava - guava-jdk5 - - - - - - com.google.http-client - google-http-client - ${google-clients.version} - - - - com.google.guava - guava-jdk5 - - - - - - com.google.apis - google-api-services-pubsub - ${pubsub.version} - - - - com.google.guava - guava-jdk5 - - - - - - joda-time - joda-time - ${joda.version} - - - - com.google.guava - guava - ${guava.version} - - - - - org.slf4j - slf4j-api - ${slf4j.version} - - - - org.slf4j - slf4j-jdk14 - ${slf4j.version} - - runtime - - - - - org.hamcrest - hamcrest-all - ${hamcrest.version} - - - - junit - junit - ${junit.version} - - - - org.mockito - mockito-all - ${mockito.version} - test - - - diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java deleted file mode 100644 index 07870f2ed0..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import java.util.List; -import java.util.regex.Pattern; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * An example that verifies word counts in Shakespeare and includes Beam best practices. - * - *

This class, {@link DebuggingWordCount}, is the third in a series of four successively more - * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount} - * and {@link WordCount}. After you've looked at this example, then see the - * {@link WindowedWordCount} pipeline, for introduction of additional concepts. - * - *

Basic concepts, also in the MinimalWordCount and WordCount examples: - * Reading text files; counting a PCollection; executing a Pipeline both locally - * and using a selected runner; defining DoFns. - * - *

New Concepts: - *

- *   1. Logging using SLF4J, even in a distributed environment
- *   2. Creating a custom metric (runners have varying levels of support)
- *   3. Testing your Pipeline via PAssert
- * 
- * - *

To execute this pipeline locally, specify general pipeline configuration: - *

{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * 
- * - *

To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - * - */ -public class DebuggingWordCount { - /** A DoFn that filters for a specific key based upon a regular expression. */ - public static class FilterTextFn extends DoFn, KV> { - /** - * Concept #1: The logger below uses the fully qualified class name of FilterTextFn as the - * logger. Depending on your SLF4J configuration, log statements will likely be qualified by - * this name. - * - *

Note that this is entirely standard SLF4J usage. Some runners may provide a default SLF4J - * configuration that is most appropriate for their logging integration. - */ - private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class); - - private final Pattern filter; - public FilterTextFn(String pattern) { - filter = Pattern.compile(pattern); - } - - /** - * Concept #2: A custom metric can track values in your pipeline as it runs. Each - * runner provides varying levels of support for metrics, and may expose them - * in a dashboard, etc. - */ - private final Counter matchedWords = Metrics.counter(FilterTextFn.class, "matchedWords"); - private final Counter unmatchedWords = Metrics.counter(FilterTextFn.class, "unmatchedWords"); - - @ProcessElement - public void processElement(ProcessContext c) { - if (filter.matcher(c.element().getKey()).matches()) { - // Log at the "DEBUG" level each element that we match. When executing this pipeline - // these log lines will appear only if the log level is set to "DEBUG" or lower. - LOG.debug("Matched: " + c.element().getKey()); - matchedWords.inc(); - c.output(c.element()); - } else { - // Log at the "TRACE" level each element that is not matched. Different log levels - // can be used to control the verbosity of logging providing an effective mechanism - // to filter less important information. - LOG.trace("Did not match: " + c.element().getKey()); - unmatchedWords.inc(); - } - } - } - - /** - * Options supported by {@link DebuggingWordCount}. - * - *

Inherits standard configuration options and all options defined in - * {@link WordCount.WordCountOptions}. - */ - public interface WordCountOptions extends WordCount.WordCountOptions { - - @Description("Regex filter pattern to use in DebuggingWordCount. " - + "Only words matching this pattern will be counted.") - @Default.String("Flourish|stomach") - String getFilterPattern(); - void setFilterPattern(String value); - } - - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); - Pipeline p = Pipeline.create(options); - - PCollection> filteredWords = - p.apply("ReadLines", TextIO.read().from(options.getInputFile())) - .apply(new WordCount.CountWords()) - .apply(ParDo.of(new FilterTextFn(options.getFilterPattern()))); - - /** - * Concept #3: PAssert is a set of convenient PTransforms in the style of - * Hamcrest's collection matchers that can be used when writing Pipeline level tests - * to validate the contents of PCollections. PAssert is best used in unit tests - * with small data sets but is demonstrated here as a teaching tool. - * - *

Below we verify that the set of filtered words matches our expected counts. Note - * that PAssert does not provide any output and that successful completion of the - * Pipeline implies that the expectations were met. Learn more at - * https://beam.apache.org/documentation/pipelines/test-your-pipeline/ on how to test - * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test. - */ - List> expectedResults = Arrays.asList( - KV.of("Flourish", 3L), - KV.of("stomach", 1L)); - PAssert.that(filteredWords).containsInAnyOrder(expectedResults); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java deleted file mode 100644 index d6b08066db..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import ${package}.common.ExampleUtils; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.apache.beam.sdk.values.KV; - - -/** - * An example that counts words in Shakespeare. - * - *

This class, {@link MinimalWordCount}, is the first in a series of four successively more - * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or - * argument processing, and focus on construction of the pipeline, which chains together the - * application of core transforms. - * - *

Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally the - * {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional - * concepts. - * - *

Concepts: - * - *

- *   1. Reading data from text files
- *   2. Specifying 'inline' transforms
- *   3. Counting items in a PCollection
- *   4. Writing data to text files
- * 
- * - *

No arguments are required to run this pipeline. It will be executed with the DirectRunner. You - * can see the results in the output files in your current working directory, with names like - * "wordcounts-00001-of-00005. When running on a distributed service, you would use an appropriate - * file service. - */ -public class MinimalWordCount { - - public static void main(String[] args) { - // Create a PipelineOptions object. This object lets us set various execution - // options for our pipeline, such as the runner you wish to use. This example - // will run with the DirectRunner by default, based on the class path configured - // in its dependencies. - PipelineOptions options = PipelineOptionsFactory.create(); - - // Create the Pipeline object with the options we defined above. - Pipeline p = Pipeline.create(options); - - // Apply the pipeline's transforms. - - // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set - // of input text files. TextIO.Read returns a PCollection where each element is one line from - // the input text (a set of Shakespeare's texts). - - // This example reads a public data set consisting of the complete works of Shakespeare. - p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) - - // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a - // DoFn (defined in-line) on each element that tokenizes the text line into individual words. - // The ParDo returns a PCollection, where each element is an individual word in - // Shakespeare's collected texts. - .apply("ExtractWords", ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - for (String word : c.element().split(ExampleUtils.TOKENIZER_PATTERN)) { - if (!word.isEmpty()) { - c.output(word); - } - } - } - })) - - // Concept #3: Apply the Count transform to our PCollection of individual words. The Count - // transform returns a new PCollection of key/value pairs, where each key represents a unique - // word in the text. The associated value is the occurrence count for that word. - .apply(Count.perElement()) - - // Apply a MapElements transform that formats our PCollection of word counts into a printable - // string, suitable for writing to an output file. - .apply("FormatResults", MapElements.via(new SimpleFunction, String>() { - @Override - public String apply(KV input) { - return input.getKey() + ": " + input.getValue(); - } - })) - - // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline. - // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of - // formatted strings) to a series of text files. - // - // By default, it will write to a set of files with names like wordcount-00001-of-00005 - .apply(TextIO.write().to("wordcounts")); - - // Run the pipeline. - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCountJava8.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCountJava8.java deleted file mode 100644 index e635a885b7..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCountJava8.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.FlatMapElements; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.TypeDescriptors; - -/** - * An example that counts words in Shakespeare, using Java 8 language features. - * - *

See {@link MinimalWordCount} for a comprehensive explanation. - */ -public class MinimalWordCountJava8 { - - public static void main(String[] args) { - PipelineOptions options = PipelineOptionsFactory.create(); - // In order to run your pipeline, you need to make following runner specific changes: - // - // CHANGE 1/3: Select a Beam runner, such as BlockingDataflowRunner - // or FlinkRunner. - // CHANGE 2/3: Specify runner-required options. - // For BlockingDataflowRunner, set project and temp location as follows: - // DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class); - // dataflowOptions.setRunner(BlockingDataflowRunner.class); - // dataflowOptions.setProject("SET_YOUR_PROJECT_ID_HERE"); - // dataflowOptions.setTempLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_TEMP_DIRECTORY"); - // For FlinkRunner, set the runner as follows. See {@code FlinkPipelineOptions} - // for more details. - // options.as(FlinkPipelineOptions.class) - // .setRunner(FlinkRunner.class); - - Pipeline p = Pipeline.create(options); - - p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) - .apply(FlatMapElements - .into(TypeDescriptors.strings()) - .via((String word) -> Arrays.asList(word.split("[^\\p{L}]+")))) - .apply(Filter.by((String word) -> !word.isEmpty())) - .apply(Count.perElement()) - .apply(MapElements - .into(TypeDescriptors.strings()) - .via((KV wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())) - // CHANGE 3/3: The Google Cloud Storage path is required for outputting the results to. - .apply(TextIO.write().to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX")); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java deleted file mode 100644 index 6a1d07c485..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.io.IOException; -import java.util.concurrent.ThreadLocalRandom; -import ${package}.common.ExampleBigQueryTableOptions; -import ${package}.common.ExampleOptions; -import ${package}.common.WriteOneFilePerWindow; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.joda.time.Duration; -import org.joda.time.Instant; - - -/** - * An example that counts words in text, and can run over either unbounded or bounded input - * collections. - * - *

This class, {@link WindowedWordCount}, is the last in a series of four successively more - * detailed 'word count' examples. First take a look at {@link MinimalWordCount}, - * {@link WordCount}, and {@link DebuggingWordCount}. - * - *

Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples: - * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally - * and using a selected runner; defining DoFns; - * user-defined PTransforms; defining PipelineOptions. - * - *

New Concepts: - *

- *   1. Unbounded and bounded pipeline input modes
- *   2. Adding timestamps to data
- *   3. Windowing
- *   4. Re-using PTransforms over windowed PCollections
- *   5. Accessing the window of an element
- *   6. Writing data to per-window text files
- * 
- * - *

By default, the examples will run with the {@code DirectRunner}. - * To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * See examples/java/README.md for instructions about how to configure different runners. - * - *

To execute this pipeline locally, specify a local output file (if using the - * {@code DirectRunner}) or output prefix on a supported distributed file system. - *

{@code
- *   --output=[YOUR_LOCAL_FILE | YOUR_OUTPUT_PREFIX]
- * }
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - * - *

By default, the pipeline will do fixed windowing, on 1-minute windows. You can - * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10} - * for 10-minute windows. - * - *

The example will try to cancel the pipeline on the signal to terminate the process (CTRL-C). - */ -public class WindowedWordCount { - static final int WINDOW_SIZE = 10; // Default window duration in minutes - /** - * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for - * this example, for the bounded data case. - * - *

Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate - * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a - * 2-hour period. - */ - static class AddTimestampFn extends DoFn { - private static final Duration RAND_RANGE = Duration.standardHours(1); - private final Instant minTimestamp; - private final Instant maxTimestamp; - - AddTimestampFn(Instant minTimestamp, Instant maxTimestamp) { - this.minTimestamp = minTimestamp; - this.maxTimestamp = maxTimestamp; - } - - @ProcessElement - public void processElement(ProcessContext c) { - Instant randomTimestamp = - new Instant( - ThreadLocalRandom.current() - .nextLong(minTimestamp.getMillis(), maxTimestamp.getMillis())); - - /** - * Concept #2: Set the data element with that timestamp. - */ - c.outputWithTimestamp(c.element(), new Instant(randomTimestamp)); - } - } - - /** A {@link DefaultValueFactory} that returns the current system time. */ - public static class DefaultToCurrentSystemTime implements DefaultValueFactory { - @Override - public Long create(PipelineOptions options) { - return System.currentTimeMillis(); - } - } - - /** A {@link DefaultValueFactory} that returns the minimum timestamp plus one hour. */ - public static class DefaultToMinTimestampPlusOneHour implements DefaultValueFactory { - @Override - public Long create(PipelineOptions options) { - return options.as(Options.class).getMinTimestampMillis() - + Duration.standardHours(1).getMillis(); - } - } - - /** - * Options for {@link WindowedWordCount}. - * - *

Inherits standard example configuration options, which allow specification of the - * runner, as well as the {@link WordCount.WordCountOptions} support for - * specification of the input and output files. - */ - public interface Options extends WordCount.WordCountOptions, - ExampleOptions, ExampleBigQueryTableOptions { - @Description("Fixed window duration, in minutes") - @Default.Integer(WINDOW_SIZE) - Integer getWindowSize(); - void setWindowSize(Integer value); - - @Description("Minimum randomly assigned timestamp, in milliseconds-since-epoch") - @Default.InstanceFactory(DefaultToCurrentSystemTime.class) - Long getMinTimestampMillis(); - void setMinTimestampMillis(Long value); - - @Description("Maximum randomly assigned timestamp, in milliseconds-since-epoch") - @Default.InstanceFactory(DefaultToMinTimestampPlusOneHour.class) - Long getMaxTimestampMillis(); - void setMaxTimestampMillis(Long value); - - @Description("Fixed number of shards to produce per window, or null for runner-chosen sharding") - Integer getNumShards(); - void setNumShards(Integer numShards); - } - - public static void main(String[] args) throws IOException { - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - final String output = options.getOutput(); - final Instant minTimestamp = new Instant(options.getMinTimestampMillis()); - final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis()); - - Pipeline pipeline = Pipeline.create(options); - - /** - * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or - * unbounded input source. - */ - PCollection input = pipeline - /** Read from the GCS file. */ - .apply(TextIO.read().from(options.getInputFile())) - // Concept #2: Add an element timestamp, using an artificial time just to show windowing. - // See AddTimestampFn for more detail on this. - .apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp))); - - /** - * Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1 - * minute (you can change this with a command-line option). See the documentation for more - * information on how fixed windows work, and for information on the other types of windowing - * available (e.g., sliding windows). - */ - PCollection windowedWords = - input.apply( - Window.into( - FixedWindows.of(Duration.standardMinutes(options.getWindowSize())))); - - /** - * Concept #4: Re-use our existing CountWords transform that does not have knowledge of - * windows over a PCollection containing windowed values. - */ - PCollection> wordCounts = windowedWords.apply(new WordCount.CountWords()); - - /** - * Concept #5: Format the results and write to a sharded file partitioned by window, using a - * simple ParDo operation. Because there may be failures followed by retries, the - * writes must be idempotent, but the details of writing to files is elided here. - */ - wordCounts - .apply(MapElements.via(new WordCount.FormatAsTextFn())) - .apply(new WriteOneFilePerWindow(output, options.getNumShards())); - - PipelineResult result = pipeline.run(); - try { - result.waitUntilFinish(); - } catch (Exception exc) { - result.cancel(); - } - } - -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WordCount.java deleted file mode 100644 index 9947a26eda..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WordCount.java +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import ${package}.common.ExampleUtils; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Distribution; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.options.Validation.Required; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; - -/** - * An example that counts words in Shakespeare and includes Beam best practices. - * - *

This class, {@link WordCount}, is the second in a series of four successively more detailed - * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}. - * After you've looked at this example, then see the {@link DebuggingWordCount} - * pipeline, for introduction of additional concepts. - * - *

For a detailed walkthrough of this example, see - * - * https://beam.apache.org/get-started/wordcount-example/ - * - * - *

Basic concepts, also in the MinimalWordCount example: - * Reading text files; counting a PCollection; writing to text files - * - *

New Concepts: - *

- *   1. Executing a Pipeline both locally and using the selected runner
- *   2. Using ParDo with static DoFns defined out-of-line
- *   3. Building a composite transform
- *   4. Defining your own pipeline options
- * 
- * - *

Concept #1: you can execute this pipeline either locally or using by selecting another runner. - * These are now command-line options and not hard-coded as they were in the MinimalWordCount - * example. - * - *

To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * - *

To execute this pipeline, specify a local output file (if using the - * {@code DirectRunner}) or output prefix on a supported distributed file system. - *

{@code
- *   --output=[YOUR_LOCAL_FILE | YOUR_OUTPUT_PREFIX]
- * }
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - */ -public class WordCount { - - /** - * Concept #2: You can make your pipeline assembly code less verbose by defining your DoFns - * statically out-of-line. This DoFn tokenizes lines of text into individual words; we pass it - * to a ParDo in the pipeline. - */ - static class ExtractWordsFn extends DoFn { - private final Counter emptyLines = Metrics.counter(ExtractWordsFn.class, "emptyLines"); - private final Distribution lineLenDist = Metrics.distribution( - ExtractWordsFn.class, "lineLenDistro"); - - @ProcessElement - public void processElement(ProcessContext c) { - lineLenDist.update(c.element().length()); - if (c.element().trim().isEmpty()) { - emptyLines.inc(); - } - - // Split the line into words. - String[] words = c.element().split(ExampleUtils.TOKENIZER_PATTERN); - - // Output each word encountered into the output PCollection. - for (String word : words) { - if (!word.isEmpty()) { - c.output(word); - } - } - } - } - - /** A SimpleFunction that converts a Word and Count into a printable string. */ - public static class FormatAsTextFn extends SimpleFunction, String> { - @Override - public String apply(KV input) { - return input.getKey() + ": " + input.getValue(); - } - } - - /** - * A PTransform that converts a PCollection containing lines of text into a PCollection of - * formatted word counts. - * - *

Concept #3: This is a custom composite transform that bundles two transforms (ParDo and - * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse, - * modular testing, and an improved monitoring experience. - */ - public static class CountWords extends PTransform, - PCollection>> { - @Override - public PCollection> expand(PCollection lines) { - - // Convert lines of text into individual words. - PCollection words = lines.apply( - ParDo.of(new ExtractWordsFn())); - - // Count the number of times each word occurs. - PCollection> wordCounts = - words.apply(Count.perElement()); - - return wordCounts; - } - } - - /** - * Options supported by {@link WordCount}. - * - *

Concept #4: Defining your own configuration options. Here, you can add your own arguments - * to be processed by the command-line parser, and specify default values for them. You can then - * access the options values in your pipeline code. - * - *

Inherits standard configuration options. - */ - public interface WordCountOptions extends PipelineOptions { - - /** - * By default, this example reads from a public dataset containing the text of - * King Lear. Set this option to choose a different input file or glob. - */ - @Description("Path of the file to read from") - @Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt") - String getInputFile(); - void setInputFile(String value); - - /** - * Set this required option to specify where to write the output. - */ - @Description("Path of the file to write to") - @Required - String getOutput(); - void setOutput(String value); - } - - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); - Pipeline p = Pipeline.create(options); - - // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the - // static FormatAsTextFn() to the ParDo transform. - p.apply("ReadLines", TextIO.read().from(options.getInputFile())) - .apply(new CountWords()) - .apply(MapElements.via(new FormatAsTextFn())) - .apply("WriteCounts", TextIO.write().to(options.getOutput())); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java deleted file mode 100644 index 57f1546e27..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import com.google.api.services.bigquery.model.TableSchema; -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure BigQuery tables in Beam examples. - * The project defaults to the project being used to run the example. - */ -public interface ExampleBigQueryTableOptions extends GcpOptions { - @Description("BigQuery dataset name") - @Default.String("beam_examples") - String getBigQueryDataset(); - void setBigQueryDataset(String dataset); - - @Description("BigQuery table name") - @Default.InstanceFactory(BigQueryTableFactory.class) - String getBigQueryTable(); - void setBigQueryTable(String table); - - @Description("BigQuery table schema") - TableSchema getBigQuerySchema(); - void setBigQuerySchema(TableSchema schema); - - /** - * Returns the job name as the default BigQuery table name. - */ - class BigQueryTableFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return options.getJobName().replace('-', '_'); - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java deleted file mode 100644 index cf142a10fd..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure Pub/Sub topic/subscription in Beam examples. - */ -public interface ExamplePubsubTopicAndSubscriptionOptions extends ExamplePubsubTopicOptions { - @Description("Pub/Sub subscription") - @Default.InstanceFactory(PubsubSubscriptionFactory.class) - String getPubsubSubscription(); - void setPubsubSubscription(String subscription); - - /** - * Returns a default Pub/Sub subscription based on the project and the job names. - */ - class PubsubSubscriptionFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return "projects/" + options.as(GcpOptions.class).getProject() - + "/subscriptions/" + options.getJobName(); - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java deleted file mode 100644 index 86784b06da..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure Pub/Sub topic in Beam examples. - */ -public interface ExamplePubsubTopicOptions extends GcpOptions { - @Description("Pub/Sub topic") - @Default.InstanceFactory(PubsubTopicFactory.class) - String getPubsubTopic(); - void setPubsubTopic(String topic); - - /** - * Returns a default Pub/Sub topic based on the project and the job names. - */ - class PubsubTopicFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return "projects/" + options.as(GcpOptions.class).getProject() - + "/topics/" + options.getJobName(); - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java deleted file mode 100644 index 78f3849b40..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import com.google.api.client.googleapis.json.GoogleJsonResponseException; -import com.google.api.client.googleapis.services.AbstractGoogleClientRequest; -import com.google.api.client.http.HttpRequestInitializer; -import com.google.api.services.bigquery.Bigquery; -import com.google.api.services.bigquery.Bigquery.Datasets; -import com.google.api.services.bigquery.Bigquery.Tables; -import com.google.api.services.bigquery.model.Dataset; -import com.google.api.services.bigquery.model.DatasetReference; -import com.google.api.services.bigquery.model.Table; -import com.google.api.services.bigquery.model.TableReference; -import com.google.api.services.bigquery.model.TableSchema; -import com.google.api.services.pubsub.Pubsub; -import com.google.api.services.pubsub.model.Subscription; -import com.google.api.services.pubsub.model.Topic; -import com.google.auth.Credentials; -import com.google.auth.http.HttpCredentialsAdapter; -import com.google.cloud.hadoop.util.ChainingHttpRequestInitializer; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.common.util.concurrent.Uninterruptibles; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.extensions.gcp.auth.NullCredentialInitializer; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryOptions; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubOptions; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.util.BackOff; -import org.apache.beam.sdk.util.BackOffUtils; -import org.apache.beam.sdk.util.FluentBackoff; -import org.apache.beam.sdk.util.RetryHttpRequestInitializer; -import org.apache.beam.sdk.util.Sleeper; -import org.apache.beam.sdk.util.Transport; -import org.joda.time.Duration; - -/** - * The utility class that sets up and tears down external resources, - * and cancels the streaming pipelines once the program terminates. - * - *

It is used to run Beam examples. - */ -public class ExampleUtils { - - private static final int SC_NOT_FOUND = 404; - - /** - * \p{L} denotes the category of Unicode letters, - * so this pattern will match on everything that is not a letter. - * - *

It is used for tokenizing strings in the wordcount examples. - */ - public static final String TOKENIZER_PATTERN = "[^\\p{L}]+"; - - private final PipelineOptions options; - private Bigquery bigQueryClient = null; - private Pubsub pubsubClient = null; - private Set pipelinesToCancel = Sets.newHashSet(); - private List pendingMessages = Lists.newArrayList(); - - /** - * Do resources and runner options setup. - */ - public ExampleUtils(PipelineOptions options) { - this.options = options; - } - - /** - * Sets up external resources that are required by the example, - * such as Pub/Sub topics and BigQuery tables. - * - * @throws IOException if there is a problem setting up the resources - */ - public void setup() throws IOException { - Sleeper sleeper = Sleeper.DEFAULT; - BackOff backOff = - FluentBackoff.DEFAULT - .withMaxRetries(3).withInitialBackoff(Duration.millis(200)).backoff(); - Throwable lastException = null; - try { - do { - try { - setupPubsub(); - setupBigQueryTable(); - return; - } catch (GoogleJsonResponseException e) { - lastException = e; - } - } while (BackOffUtils.next(sleeper, backOff)); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - // Ignore InterruptedException - } - throw new RuntimeException(lastException); - } - - /** - * Sets up the Google Cloud Pub/Sub topic. - * - *

If the topic doesn't exist, a new topic with the given name will be created. - * - * @throws IOException if there is a problem setting up the Pub/Sub topic - */ - public void setupPubsub() throws IOException { - ExamplePubsubTopicAndSubscriptionOptions pubsubOptions = - options.as(ExamplePubsubTopicAndSubscriptionOptions.class); - if (!pubsubOptions.getPubsubTopic().isEmpty()) { - pendingMessages.add("**********************Set Up Pubsub************************"); - setupPubsubTopic(pubsubOptions.getPubsubTopic()); - pendingMessages.add("The Pub/Sub topic has been set up for this example: " - + pubsubOptions.getPubsubTopic()); - - if (!pubsubOptions.getPubsubSubscription().isEmpty()) { - setupPubsubSubscription( - pubsubOptions.getPubsubTopic(), pubsubOptions.getPubsubSubscription()); - pendingMessages.add("The Pub/Sub subscription has been set up for this example: " - + pubsubOptions.getPubsubSubscription()); - } - } - } - - /** - * Sets up the BigQuery table with the given schema. - * - *

If the table already exists, the schema has to match the given one. Otherwise, the example - * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema - * will be created. - * - * @throws IOException if there is a problem setting up the BigQuery table - */ - public void setupBigQueryTable() throws IOException { - ExampleBigQueryTableOptions bigQueryTableOptions = - options.as(ExampleBigQueryTableOptions.class); - if (bigQueryTableOptions.getBigQueryDataset() != null - && bigQueryTableOptions.getBigQueryTable() != null - && bigQueryTableOptions.getBigQuerySchema() != null) { - pendingMessages.add("******************Set Up Big Query Table*******************"); - setupBigQueryTable(bigQueryTableOptions.getProject(), - bigQueryTableOptions.getBigQueryDataset(), - bigQueryTableOptions.getBigQueryTable(), - bigQueryTableOptions.getBigQuerySchema()); - pendingMessages.add("The BigQuery table has been set up for this example: " - + bigQueryTableOptions.getProject() - + ":" + bigQueryTableOptions.getBigQueryDataset() - + "." + bigQueryTableOptions.getBigQueryTable()); - } - } - - /** - * Tears down external resources that can be deleted upon the example's completion. - */ - private void tearDown() { - pendingMessages.add("*************************Tear Down*************************"); - ExamplePubsubTopicAndSubscriptionOptions pubsubOptions = - options.as(ExamplePubsubTopicAndSubscriptionOptions.class); - if (!pubsubOptions.getPubsubTopic().isEmpty()) { - try { - deletePubsubTopic(pubsubOptions.getPubsubTopic()); - pendingMessages.add("The Pub/Sub topic has been deleted: " - + pubsubOptions.getPubsubTopic()); - } catch (IOException e) { - pendingMessages.add("Failed to delete the Pub/Sub topic : " - + pubsubOptions.getPubsubTopic()); - } - if (!pubsubOptions.getPubsubSubscription().isEmpty()) { - try { - deletePubsubSubscription(pubsubOptions.getPubsubSubscription()); - pendingMessages.add("The Pub/Sub subscription has been deleted: " - + pubsubOptions.getPubsubSubscription()); - } catch (IOException e) { - pendingMessages.add("Failed to delete the Pub/Sub subscription : " - + pubsubOptions.getPubsubSubscription()); - } - } - } - - ExampleBigQueryTableOptions bigQueryTableOptions = - options.as(ExampleBigQueryTableOptions.class); - if (bigQueryTableOptions.getBigQueryDataset() != null - && bigQueryTableOptions.getBigQueryTable() != null - && bigQueryTableOptions.getBigQuerySchema() != null) { - pendingMessages.add("The BigQuery table might contain the example's output, " - + "and it is not deleted automatically: " - + bigQueryTableOptions.getProject() - + ":" + bigQueryTableOptions.getBigQueryDataset() - + "." + bigQueryTableOptions.getBigQueryTable()); - pendingMessages.add("Please go to the Developers Console to delete it manually." - + " Otherwise, you may be charged for its usage."); - } - } - - /** - * Returns a BigQuery client builder using the specified {@link BigQueryOptions}. - */ - private static Bigquery.Builder newBigQueryClient(BigQueryOptions options) { - return new Bigquery.Builder(Transport.getTransport(), Transport.getJsonFactory(), - chainHttpRequestInitializer( - options.getGcpCredential(), - // Do not log 404. It clutters the output and is possibly even required by the caller. - new RetryHttpRequestInitializer(ImmutableList.of(404)))) - .setApplicationName(options.getAppName()) - .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); - } - - /** - * Returns a Pubsub client builder using the specified {@link PubsubOptions}. - */ - private static Pubsub.Builder newPubsubClient(PubsubOptions options) { - return new Pubsub.Builder(Transport.getTransport(), Transport.getJsonFactory(), - chainHttpRequestInitializer( - options.getGcpCredential(), - // Do not log 404. It clutters the output and is possibly even required by the caller. - new RetryHttpRequestInitializer(ImmutableList.of(404)))) - .setRootUrl(options.getPubsubRootUrl()) - .setApplicationName(options.getAppName()) - .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); - } - - private static HttpRequestInitializer chainHttpRequestInitializer( - Credentials credential, HttpRequestInitializer httpRequestInitializer) { - if (credential == null) { - return new ChainingHttpRequestInitializer( - new NullCredentialInitializer(), httpRequestInitializer); - } else { - return new ChainingHttpRequestInitializer( - new HttpCredentialsAdapter(credential), - httpRequestInitializer); - } - } - - private void setupBigQueryTable(String projectId, String datasetId, String tableId, - TableSchema schema) throws IOException { - if (bigQueryClient == null) { - bigQueryClient = newBigQueryClient(options.as(BigQueryOptions.class)).build(); - } - - Datasets datasetService = bigQueryClient.datasets(); - if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) { - Dataset newDataset = new Dataset().setDatasetReference( - new DatasetReference().setProjectId(projectId).setDatasetId(datasetId)); - datasetService.insert(projectId, newDataset).execute(); - } - - Tables tableService = bigQueryClient.tables(); - Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId)); - if (table == null) { - Table newTable = new Table().setSchema(schema).setTableReference( - new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId)); - tableService.insert(projectId, datasetId, newTable).execute(); - } else if (!table.getSchema().equals(schema)) { - throw new RuntimeException( - "Table exists and schemas do not match, expecting: " + schema.toPrettyString() - + ", actual: " + table.getSchema().toPrettyString()); - } - } - - private void setupPubsubTopic(String topic) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) { - pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute(); - } - } - - private void setupPubsubSubscription(String topic, String subscription) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) == null) { - Subscription subInfo = new Subscription() - .setAckDeadlineSeconds(60) - .setTopic(topic); - pubsubClient.projects().subscriptions().create(subscription, subInfo).execute(); - } - } - - /** - * Deletes the Google Cloud Pub/Sub topic. - * - * @throws IOException if there is a problem deleting the Pub/Sub topic - */ - private void deletePubsubTopic(String topic) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) { - pubsubClient.projects().topics().delete(topic).execute(); - } - } - - /** - * Deletes the Google Cloud Pub/Sub subscription. - * - * @throws IOException if there is a problem deleting the Pub/Sub subscription - */ - private void deletePubsubSubscription(String subscription) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) != null) { - pubsubClient.projects().subscriptions().delete(subscription).execute(); - } - } - - /** - * Waits for the pipeline to finish and cancels it before the program exists. - */ - public void waitToFinish(PipelineResult result) { - pipelinesToCancel.add(result); - if (!options.as(ExampleOptions.class).getKeepJobsRunning()) { - addShutdownHook(pipelinesToCancel); - } - try { - result.waitUntilFinish(); - } catch (UnsupportedOperationException e) { - // Do nothing if the given PipelineResult doesn't support waitUntilFinish(), - // such as EvaluationResults returned by DirectRunner. - tearDown(); - printPendingMessages(); - } catch (Exception e) { - throw new RuntimeException("Failed to wait the pipeline until finish: " + result); - } - } - - private void addShutdownHook(final Collection pipelineResults) { - Runtime.getRuntime().addShutdownHook(new Thread() { - @Override - public void run() { - tearDown(); - printPendingMessages(); - for (PipelineResult pipelineResult : pipelineResults) { - try { - pipelineResult.cancel(); - } catch (IOException e) { - System.out.println("Failed to cancel the job."); - System.out.println(e.getMessage()); - } - } - - for (PipelineResult pipelineResult : pipelineResults) { - boolean cancellationVerified = false; - for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) { - if (pipelineResult.getState().isTerminal()) { - cancellationVerified = true; - break; - } else { - System.out.println( - "The example pipeline is still running. Verifying the cancellation."); - } - Uninterruptibles.sleepUninterruptibly(10, TimeUnit.SECONDS); - } - if (!cancellationVerified) { - System.out.println("Failed to verify the cancellation for job: " + pipelineResult); - } - } - } - }); - } - - private void printPendingMessages() { - System.out.println(); - System.out.println("***********************************************************"); - System.out.println("***********************************************************"); - for (String message : pendingMessages) { - System.out.println(message); - } - System.out.println("***********************************************************"); - System.out.println("***********************************************************"); - } - - private static T executeNullIfNotFound( - AbstractGoogleClientRequest request) throws IOException { - try { - return request.execute(); - } catch (GoogleJsonResponseException e) { - if (e.getStatusCode() == SC_NOT_FOUND) { - return null; - } else { - throw e; - } - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java deleted file mode 100644 index 9796d647b5..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import static com.google.common.base.MoreObjects.firstNonNull; - -import javax.annotation.Nullable; -import org.apache.beam.sdk.io.FileBasedSink; -import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy; -import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions; -import org.apache.beam.sdk.io.fs.ResourceId; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; -import org.joda.time.format.DateTimeFormatter; -import org.joda.time.format.ISODateTimeFormat; - -/** - * A {@link DoFn} that writes elements to files with names deterministically derived from the lower - * and upper bounds of their key (an {@link IntervalWindow}). - * - *

This is test utility code, not for end-users, so examples can be focused on their primary - * lessons. - */ -public class WriteOneFilePerWindow extends PTransform, PDone> { - private static final DateTimeFormatter FORMATTER = ISODateTimeFormat.hourMinute(); - private String filenamePrefix; - @Nullable - private Integer numShards; - - public WriteOneFilePerWindow(String filenamePrefix, Integer numShards) { - this.filenamePrefix = filenamePrefix; - this.numShards = numShards; - } - - @Override - public PDone expand(PCollection input) { - ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); - TextIO.Write write = - TextIO.write() - .to(new PerWindowFiles(resource)) - .withTempDirectory(resource.getCurrentDirectory()) - .withWindowedWrites(); - if (numShards != null) { - write = write.withNumShards(numShards); - } - return input.apply(write); - } - - /** - * A {@link FilenamePolicy} produces a base file name for a write based on metadata about the data - * being written. This always includes the shard number and the total number of shards. For - * windowed writes, it also includes the window and pane index (a sequence number assigned to each - * trigger firing). - */ - public static class PerWindowFiles extends FilenamePolicy { - - private final ResourceId baseFilename; - - public PerWindowFiles(ResourceId baseFilename) { - this.baseFilename = baseFilename; - } - - public String filenamePrefixForWindow(IntervalWindow window) { - String prefix = - baseFilename.isDirectory() ? "" : firstNonNull(baseFilename.getFilename(), ""); - return String.format("%s-%s-%s", - prefix, FORMATTER.print(window.start()), FORMATTER.print(window.end())); - } - - @Override - public ResourceId windowedFilename(int shardNumber, - int numShards, - BoundedWindow window, - PaneInfo paneInfo, - OutputFileHints outputFileHints) { - IntervalWindow intervalWindow = (IntervalWindow) window; - String filename = - String.format( - "%s-%s-of-%s%s", - filenamePrefixForWindow(intervalWindow), - shardNumber, - numShards, - outputFileHints.getSuggestedFilenameSuffix()); - return baseFilename - .getCurrentDirectory() - .resolve(filename, StandardResolveOptions.RESOLVE_FILE); - } - - @Override - public ResourceId unwindowedFilename( - int shardNumber, int numShards, OutputFileHints outputFileHints) { - throw new UnsupportedOperationException("Unsupported."); - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java deleted file mode 100644 index 26e1498d71..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import com.google.common.io.Files; -import java.io.File; -import java.nio.charset.StandardCharsets; -import ${package}.DebuggingWordCount.WordCountOptions; -import org.apache.beam.sdk.testing.TestPipeline; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests for {@link DebuggingWordCount}. - */ -@RunWith(JUnit4.class) -public class DebuggingWordCountTest { - @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); - - private String getFilePath(String filePath) { - if (filePath.contains(":")) { - return filePath.replace("\\", "/").split(":")[1]; - } - return filePath; - } - - @Test - public void testDebuggingWordCount() throws Exception { - File inputFile = tmpFolder.newFile(); - File outputFile = tmpFolder.newFile(); - Files.write( - "stomach secret Flourish message Flourish here Flourish", - inputFile, - StandardCharsets.UTF_8); - WordCountOptions options = - TestPipeline.testingPipelineOptions().as(WordCountOptions.class); - options.setInputFile(getFilePath(inputFile.getAbsolutePath())); - options.setOutput(getFilePath(outputFile.getAbsolutePath())); - DebuggingWordCount.main(TestPipeline.convertToArgs(options)); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/WordCountTest.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/WordCountTest.java deleted file mode 100644 index b4e4124e26..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/WordCountTest.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import java.util.List; -import ${package}.WordCount.CountWords; -import ${package}.WordCount.ExtractWordsFn; -import ${package}.WordCount.FormatAsTextFn; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.ValidatesRunner; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnTester; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.values.PCollection; -import org.hamcrest.CoreMatchers; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests of WordCount. - */ -@RunWith(JUnit4.class) -public class WordCountTest { - - /** Example test that tests a specific {@link DoFn}. */ - @Test - public void testExtractWordsFn() throws Exception { - DoFnTester extractWordsFn = - DoFnTester.of(new ExtractWordsFn()); - - Assert.assertThat(extractWordsFn.processBundle(" some input words "), - CoreMatchers.hasItems("some", "input", "words")); - Assert.assertThat(extractWordsFn.processBundle(" "), - CoreMatchers.hasItems()); - Assert.assertThat(extractWordsFn.processBundle(" some ", " input", " words"), - CoreMatchers.hasItems("some", "input", "words")); - } - - static final String[] WORDS_ARRAY = new String[] { - "hi there", "hi", "hi sue bob", - "hi sue", "", "bob hi"}; - - static final List WORDS = Arrays.asList(WORDS_ARRAY); - - static final String[] COUNTS_ARRAY = new String[] { - "hi: 5", "there: 1", "sue: 2", "bob: 2"}; - - @Rule - public TestPipeline p = TestPipeline.create(); - - /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */ - @Test - @Category(ValidatesRunner.class) - public void testCountWords() throws Exception { - PCollection input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of())); - - PCollection output = input.apply(new CountWords()) - .apply(MapElements.via(new FormatAsTextFn())); - - PAssert.that(output).containsInAnyOrder(COUNTS_ARRAY); - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples-java8/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/examples-java8/src/test/resources/projects/basic/archetype.properties deleted file mode 100644 index b0195b3f16..0000000000 --- a/maven-archetypes/examples-java8/src/test/resources/projects/basic/archetype.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -package=it.pkg -version=0.1 -groupId=archetype.it -artifactId=basic -targetPlatform=1.8 diff --git a/maven-archetypes/examples-java8/src/test/resources/projects/basic/goal.txt b/maven-archetypes/examples-java8/src/test/resources/projects/basic/goal.txt deleted file mode 100644 index 0b5987362f..0000000000 --- a/maven-archetypes/examples-java8/src/test/resources/projects/basic/goal.txt +++ /dev/null @@ -1 +0,0 @@ -verify diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml index cfe47d4d8a..5ff4872335 100644 --- a/maven-archetypes/examples/pom.xml +++ b/maven-archetypes/examples/pom.xml @@ -39,7 +39,7 @@ org.apache.maven.archetype archetype-packaging - 2.4 + ${archetype-packaging.version} @@ -47,12 +47,12 @@ maven-archetype-plugin - 2.4 + ${maven-archetype-plugin.version} org.apache.maven.shared maven-invoker - 2.2 + ${maven-invoker.version} diff --git a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml index 2b9eb52d80..29f8605cce 100644 --- a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml +++ b/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml @@ -22,7 +22,7 @@ - 1.7 + 1.8 diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml index 2c2b8d3fc6..dcbedafd76 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml @@ -32,13 +32,13 @@ @google-clients.version@ @guava.version@ @hamcrest.version@ - @jackson.version@ @joda.version@ @junit.version@ @maven-compiler-plugin.version@ - @maven-exec-plugin.version@ + @exec-maven-plugin.version@ @maven-jar-plugin.version@ @maven-shade-plugin.version@ + @mockito.version@ @pubsub.version@ @slf4j.version@ @surefire-plugin.version@ @@ -137,7 +137,7 @@ org.codehaus.mojo exec-maven-plugin - ${maven-exec-plugin.version} + ${exec-maven-plugin.version} false @@ -251,5 +251,12 @@ junit ${junit.version} + + + org.mockito + mockito-all + ${mockito.version} + test + diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java index d6b08066db..f1bd8bfaa8 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java @@ -17,18 +17,17 @@ */ package ${package}; -import ${package}.common.ExampleUtils; +import java.util.Arrays; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.Filter; +import org.apache.beam.sdk.transforms.FlatMapElements; import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; import org.apache.beam.sdk.values.KV; - +import org.apache.beam.sdk.values.TypeDescriptors; /** * An example that counts words in Shakespeare. @@ -59,16 +58,30 @@ public class MinimalWordCount { public static void main(String[] args) { + // Create a PipelineOptions object. This object lets us set various execution // options for our pipeline, such as the runner you wish to use. This example // will run with the DirectRunner by default, based on the class path configured // in its dependencies. PipelineOptions options = PipelineOptionsFactory.create(); - // Create the Pipeline object with the options we defined above. - Pipeline p = Pipeline.create(options); + // In order to run your pipeline, you need to make following runner specific changes: + // + // CHANGE 1/3: Select a Beam runner, such as BlockingDataflowRunner + // or FlinkRunner. + // CHANGE 2/3: Specify runner-required options. + // For BlockingDataflowRunner, set project and temp location as follows: + // DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class); + // dataflowOptions.setRunner(BlockingDataflowRunner.class); + // dataflowOptions.setProject("SET_YOUR_PROJECT_ID_HERE"); + // dataflowOptions.setTempLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_TEMP_DIRECTORY"); + // For FlinkRunner, set the runner as follows. See {@code FlinkPipelineOptions} + // for more details. + // options.as(FlinkPipelineOptions.class) + // .setRunner(FlinkRunner.class); - // Apply the pipeline's transforms. + // Create the Pipeline object with the options we defined above + Pipeline p = Pipeline.create(options); // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set // of input text files. TextIO.Read returns a PCollection where each element is one line from @@ -77,43 +90,30 @@ public static void main(String[] args) { // This example reads a public data set consisting of the complete works of Shakespeare. p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) - // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a - // DoFn (defined in-line) on each element that tokenizes the text line into individual words. - // The ParDo returns a PCollection, where each element is an individual word in - // Shakespeare's collected texts. - .apply("ExtractWords", ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - for (String word : c.element().split(ExampleUtils.TOKENIZER_PATTERN)) { - if (!word.isEmpty()) { - c.output(word); - } - } - } - })) - - // Concept #3: Apply the Count transform to our PCollection of individual words. The Count - // transform returns a new PCollection of key/value pairs, where each key represents a unique - // word in the text. The associated value is the occurrence count for that word. - .apply(Count.perElement()) - - // Apply a MapElements transform that formats our PCollection of word counts into a printable - // string, suitable for writing to an output file. - .apply("FormatResults", MapElements.via(new SimpleFunction, String>() { - @Override - public String apply(KV input) { - return input.getKey() + ": " + input.getValue(); - } - })) - - // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline. - // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of - // formatted strings) to a series of text files. - // - // By default, it will write to a set of files with names like wordcount-00001-of-00005 - .apply(TextIO.write().to("wordcounts")); + // Concept #2: Apply a FlatMapElements transform the PCollection of text lines. + // This transform splits the lines in PCollection, where each element is an + // individual word in Shakespeare's collected texts. + .apply(FlatMapElements + .into(TypeDescriptors.strings()) + .via((String word) -> Arrays.asList(word.split("[^\\p{L}]+")))) + // We use a Filter transform to avoid empty word + .apply(Filter.by((String word) -> !word.isEmpty())) + // Concept #3: Apply the Count transform to our PCollection of individual words. The Count + // transform returns a new PCollection of key/value pairs, where each key represents a + // unique word in the text. The associated value is the occurrence count for that word. + .apply(Count.perElement()) + // Apply a MapElements transform that formats our PCollection of word counts into a + // printable string, suitable for writing to an output file. + .apply(MapElements + .into(TypeDescriptors.strings()) + .via((KV wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())) + // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline. + // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of + // formatted strings) to a series of text files. + // + // By default, it will write to a set of files with names like wordcounts-00001-of-00005 + .apply(TextIO.write().to("wordcounts")); - // Run the pipeline. p.run().waitUntilFinish(); } } diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java index 6a1d07c485..501ac27881 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java @@ -40,7 +40,6 @@ import org.joda.time.Duration; import org.joda.time.Instant; - /** * An example that counts words in text, and can run over either unbounded or bounded input * collections. @@ -98,7 +97,6 @@ public class WindowedWordCount { * 2-hour period. */ static class AddTimestampFn extends DoFn { - private static final Duration RAND_RANGE = Duration.standardHours(1); private final Instant minTimestamp; private final Instant maxTimestamp; @@ -162,7 +160,7 @@ public interface Options extends WordCount.WordCountOptions, Long getMaxTimestampMillis(); void setMaxTimestampMillis(Long value); - @Description("Fixed number of shards to produce per window, or null for runner-chosen sharding") + @Description("Fixed number of shards to produce per window") Integer getNumShards(); void setNumShards(Integer numShards); } @@ -194,8 +192,7 @@ public static void main(String[] args) throws IOException { */ PCollection windowedWords = input.apply( - Window.into( - FixedWindows.of(Duration.standardMinutes(options.getWindowSize())))); + Window.into(FixedWindows.of(Duration.standardMinutes(options.getWindowSize())))); /** * Concept #4: Re-use our existing CountWords transform that does not have knowledge of diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java index 9947a26eda..33f7b39f19 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java @@ -137,8 +137,7 @@ public PCollection> expand(PCollection lines) { ParDo.of(new ExtractWordsFn())); // Count the number of times each word occurs. - PCollection> wordCounts = - words.apply(Count.perElement()); + PCollection> wordCounts = words.apply(Count.perElement()); return wordCounts; } diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java index 78f3849b40..e1159b9018 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java @@ -346,38 +346,39 @@ public void waitToFinish(PipelineResult result) { } private void addShutdownHook(final Collection pipelineResults) { - Runtime.getRuntime().addShutdownHook(new Thread() { - @Override - public void run() { - tearDown(); - printPendingMessages(); - for (PipelineResult pipelineResult : pipelineResults) { - try { - pipelineResult.cancel(); - } catch (IOException e) { - System.out.println("Failed to cancel the job."); - System.out.println(e.getMessage()); - } - } + Runtime.getRuntime() + .addShutdownHook( + new Thread( + () -> { + tearDown(); + printPendingMessages(); + for (PipelineResult pipelineResult : pipelineResults) { + try { + pipelineResult.cancel(); + } catch (IOException e) { + System.out.println("Failed to cancel the job."); + System.out.println(e.getMessage()); + } + } - for (PipelineResult pipelineResult : pipelineResults) { - boolean cancellationVerified = false; - for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) { - if (pipelineResult.getState().isTerminal()) { - cancellationVerified = true; - break; - } else { - System.out.println( - "The example pipeline is still running. Verifying the cancellation."); - } - Uninterruptibles.sleepUninterruptibly(10, TimeUnit.SECONDS); - } - if (!cancellationVerified) { - System.out.println("Failed to verify the cancellation for job: " + pipelineResult); - } - } - } - }); + for (PipelineResult pipelineResult : pipelineResults) { + boolean cancellationVerified = false; + for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) { + if (pipelineResult.getState().isTerminal()) { + cancellationVerified = true; + break; + } else { + System.out.println( + "The example pipeline is still running. Verifying the cancellation."); + } + Uninterruptibles.sleepUninterruptibly(10, TimeUnit.SECONDS); + } + if (!cancellationVerified) { + System.out.println( + "Failed to verify the cancellation for job: " + pipelineResult); + } + } + })); } private void printPendingMessages() { diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java similarity index 68% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java index a286811293..3cb04bd2e4 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java @@ -19,8 +19,8 @@ import java.util.HashMap; import java.util.Map; -import java.util.TimeZone; import ${package}.common.ExampleUtils; +import ${package}.complete.game.utils.GameConstants; import ${package}.complete.game.utils.WriteWindowedToBigQuery; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineResult; @@ -50,11 +50,8 @@ import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollectionView; import org.apache.beam.sdk.values.TypeDescriptors; -import org.joda.time.DateTimeZone; import org.joda.time.Duration; import org.joda.time.Instant; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -93,14 +90,8 @@ */ public class GameStats extends LeaderBoard { - private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms"; - - private static DateTimeFormatter fmt = - DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); - /** - * Filter out all but those users with a high clickrate, which we will consider as 'spammy' uesrs. + * Filter out all users but those with a high clickrate, which we will consider as 'spammy' users. * We do this by finding the mean total score per user, then using that information as a side * input to filter out all but those user scores that are larger than * {@code (mean * SCORE_WEIGHT)}. @@ -115,12 +106,12 @@ public static class CalculateSpammyUsers public PCollection> expand(PCollection> userScores) { // Get the sum of scores for each user. - PCollection> sumScores = userScores - .apply("UserSum", Sum.integersPerKey()); + PCollection> sumScores = + userScores.apply("UserSum", Sum.integersPerKey()); // Extract the score from each element, and use it to find the global mean. - final PCollectionView globalMeanScore = sumScores.apply(Values.create()) - .apply(Mean.globally().asSingletonView()); + final PCollectionView globalMeanScore = + sumScores.apply(Values.create()).apply(Mean.globally().asSingletonView()); // Filter the user sums using the global mean. PCollection> filtered = sumScores @@ -193,27 +184,24 @@ interface Options extends LeaderBoard.Options { protected static Map>> configureWindowedWrite() { Map>> tableConfigure = - new HashMap>>(); + new HashMap<>(); tableConfigure.put( - "team", - new WriteWindowedToBigQuery.FieldInfo>( - "STRING", (c, w) -> c.element().getKey())); + "team", new WriteWindowedToBigQuery.FieldInfo<>("STRING", (c, w) -> c.element().getKey())); tableConfigure.put( "total_score", - new WriteWindowedToBigQuery.FieldInfo>( - "INTEGER", (c, w) -> c.element().getValue())); + new WriteWindowedToBigQuery.FieldInfo<>("INTEGER", (c, w) -> c.element().getValue())); tableConfigure.put( "window_start", - new WriteWindowedToBigQuery.FieldInfo>( + new WriteWindowedToBigQuery.FieldInfo<>( "STRING", (c, w) -> { IntervalWindow window = (IntervalWindow) w; - return fmt.print(window.start()); + return GameConstants.DATE_TIME_FORMATTER.print(window.start()); })); tableConfigure.put( "processing_time", - new WriteWindowedToBigQuery.FieldInfo>( - "STRING", (c, w) -> fmt.print(Instant.now()))); + new WriteWindowedToBigQuery.FieldInfo<>( + "STRING", (c, w) -> GameConstants.DATE_TIME_FORMATTER.print(Instant.now()))); return tableConfigure; } @@ -224,19 +212,17 @@ interface Options extends LeaderBoard.Options { protected static Map> configureSessionWindowWrite() { - Map> tableConfigure = - new HashMap>(); + Map> tableConfigure = new HashMap<>(); tableConfigure.put( "window_start", - new WriteWindowedToBigQuery.FieldInfo( + new WriteWindowedToBigQuery.FieldInfo<>( "STRING", (c, w) -> { IntervalWindow window = (IntervalWindow) w; - return fmt.print(window.start()); + return GameConstants.DATE_TIME_FORMATTER.print(window.start()); })); tableConfigure.put( - "mean_duration", - new WriteWindowedToBigQuery.FieldInfo("FLOAT", (c, w) -> c.element())); + "mean_duration", new WriteWindowedToBigQuery.FieldInfo<>("FLOAT", (c, w) -> c.element())); return tableConfigure; } @@ -253,7 +239,8 @@ public static void main(String[] args) throws Exception { // Read Events from Pub/Sub using custom timestamps PCollection rawEvents = pipeline .apply(PubsubIO.readStrings() - .withTimestampAttribute(TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic())) + .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE) + .fromTopic(options.getTopic())) .apply("ParseGameEvent", ParDo.of(new ParseEventFn())); // Extract username/score pairs from the event stream @@ -265,16 +252,19 @@ public static void main(String[] args) throws Exception { // Calculate the total score per user over fixed windows, and // cumulative updates for late data. - final PCollectionView> spammersView = userEvents - .apply("FixedWindowsUser", Window.>into( - FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))) - - // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate. - // These might be robots/spammers. - .apply("CalculateSpammyUsers", new CalculateSpammyUsers()) - // Derive a view from the collection of spammer users. It will be used as a side input - // in calculating the team score sums, below. - .apply("CreateSpammersView", View.asMap()); + final PCollectionView> spammersView = + userEvents + .apply( + "FixedWindowsUser", + Window.into( + FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))) + + // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate. + // These might be robots/spammers. + .apply("CalculateSpammyUsers", new CalculateSpammyUsers()) + // Derive a view from the collection of spammer users. It will be used as a side input + // in calculating the team score sums, below. + .apply("CreateSpammersView", View.asMap()); // [START DocInclude_FilterAndCalc] // Calculate the total score per team over fixed windows, @@ -282,29 +272,35 @@ public static void main(String[] args) throws Exception { // suspected robots-- to filter out scores from those users from the sum. // Write the results to BigQuery. rawEvents - .apply("WindowIntoFixedWindows", Window.into( - FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))) - // Filter out the detected spammer users, using the side input derived above. - .apply("FilterOutSpammers", ParDo - .of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - // If the user is not in the spammers Map, output the data element. - if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) { - c.output(c.element()); - } - } - }).withSideInputs(spammersView)) + .apply( + "WindowIntoFixedWindows", + Window.into( + FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))) + // Filter out the detected spammer users, using the side input derived above. + .apply( + "FilterOutSpammers", + ParDo.of( + new DoFn() { + @ProcessElement + public void processElement(ProcessContext c) { + // If the user is not in the spammers Map, output the data element. + if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) { + c.output(c.element()); + } + } + }) + .withSideInputs(spammersView)) // Extract and sum teamname/score pairs from the event data. - .apply("ExtractTeamScore", new ExtractAndSumScore("team")) - // [END DocInclude_FilterAndCalc] - // Write the result to BigQuery - .apply("WriteTeamSums", - new WriteWindowedToBigQuery>( - options.as(GcpOptions.class).getProject(), - options.getDataset(), - options.getGameStatsTablePrefix() + "_team", configureWindowedWrite())); - + .apply("ExtractTeamScore", new ExtractAndSumScore("team")) + // [END DocInclude_FilterAndCalc] + // Write the result to BigQuery + .apply( + "WriteTeamSums", + new WriteWindowedToBigQuery<>( + options.as(GcpOptions.class).getProject(), + options.getDataset(), + options.getGameStatsTablePrefix() + "_team", + configureWindowedWrite())); // [START DocInclude_SessionCalc] // Detect user sessions-- that is, a burst of activity separated by a gap from further @@ -312,27 +308,33 @@ public void processElement(ProcessContext c) { // This information could help the game designers track the changing user engagement // as their set of games changes. userEvents - .apply("WindowIntoSessions", Window.>into( - Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap()))) - .withTimestampCombiner(TimestampCombiner.END_OF_WINDOW)) - // For this use, we care only about the existence of the session, not any particular - // information aggregated over it, so the following is an efficient way to do that. - .apply(Combine.perKey(x -> 0)) - // Get the duration per session. - .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn())) - // [END DocInclude_SessionCalc] - // [START DocInclude_Rewindow] - // Re-window to process groups of session sums according to when the sessions complete. - .apply("WindowToExtractSessionMean", Window.into( - FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration())))) - // Find the mean session duration in each window. - .apply(Mean.globally().withoutDefaults()) - // Write this info to a BigQuery table. - .apply("WriteAvgSessionLength", - new WriteWindowedToBigQuery( - options.as(GcpOptions.class).getProject(), - options.getDataset(), - options.getGameStatsTablePrefix() + "_sessions", configureSessionWindowWrite())); + .apply( + "WindowIntoSessions", + Window.>into( + Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap()))) + .withTimestampCombiner(TimestampCombiner.END_OF_WINDOW)) + // For this use, we care only about the existence of the session, not any particular + // information aggregated over it, so the following is an efficient way to do that. + .apply(Combine.perKey(x -> 0)) + // Get the duration per session. + .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn())) + // [END DocInclude_SessionCalc] + // [START DocInclude_Rewindow] + // Re-window to process groups of session sums according to when the sessions complete. + .apply( + "WindowToExtractSessionMean", + Window.into( + FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration())))) + // Find the mean session duration in each window. + .apply(Mean.globally().withoutDefaults()) + // Write this info to a BigQuery table. + .apply( + "WriteAvgSessionLength", + new WriteWindowedToBigQuery<>( + options.as(GcpOptions.class).getProject(), + options.getDataset(), + options.getGameStatsTablePrefix() + "_sessions", + configureSessionWindowWrite())); // [END DocInclude_Rewindow] diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java similarity index 75% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java index e60af492e4..fe1fe99da7 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java @@ -20,6 +20,7 @@ import java.util.HashMap; import java.util.Map; import java.util.TimeZone; +import ${package}.complete.game.utils.GameConstants; import ${package}.complete.game.utils.WriteToText; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.TextIO; @@ -73,9 +74,6 @@ */ public class HourlyTeamScore extends UserScore { - private static DateTimeFormatter fmt = - DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); private static DateTimeFormatter minFmt = DateTimeFormat.forPattern("yyyy-MM-dd-HH-mm") .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); @@ -113,15 +111,14 @@ interface Options extends UserScore.Options { */ protected static Map>> configureOutput() { - Map>> config = - new HashMap>>(); + Map>> config = new HashMap<>(); config.put("team", (c, w) -> c.element().getKey()); config.put("total_score", (c, w) -> c.element().getValue()); config.put( "window_start", (c, w) -> { IntervalWindow window = (IntervalWindow) w; - return fmt.print(window.start()); + return GameConstants.DATE_TIME_FORMATTER.print(window.start()); }); return config; } @@ -140,40 +137,43 @@ public static void main(String[] args) throws Exception { final Instant startMinTimestamp = new Instant(minFmt.parseMillis(options.getStartMin())); // Read 'gaming' events from a text file. - pipeline.apply(TextIO.read().from(options.getInput())) - // Parse the incoming data. - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) - - // Filter out data before and after the given times so that it is not included - // in the calculations. As we collect data in batches (say, by day), the batch for the day - // that we want to analyze could potentially include some late-arriving data from the previous - // day. If so, we want to weed it out. Similarly, if we include data from the following day - // (to scoop up late-arriving events from the day we're analyzing), we need to weed out events - // that fall after the time period we want to analyze. - // [START DocInclude_HTSFilters] - .apply("FilterStartTime", Filter.by( - (GameActionInfo gInfo) - -> gInfo.getTimestamp() > startMinTimestamp.getMillis())) - .apply("FilterEndTime", Filter.by( - (GameActionInfo gInfo) - -> gInfo.getTimestamp() < stopMinTimestamp.getMillis())) - // [END DocInclude_HTSFilters] - - // [START DocInclude_HTSAddTsAndWindow] - // Add an element timestamp based on the event log, and apply fixed windowing. - .apply("AddEventTimestamps", - WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp()))) - .apply("FixedWindowsTeam", Window.into( - FixedWindows.of(Duration.standardMinutes(options.getWindowDuration())))) - // [END DocInclude_HTSAddTsAndWindow] - - // Extract and sum teamname/score pairs from the event data. - .apply("ExtractTeamScore", new ExtractAndSumScore("team")) - .apply("WriteTeamScoreSums", - new WriteToText>( - options.getOutput(), - configureOutput(), - true)); + pipeline + .apply(TextIO.read().from(options.getInput())) + // Parse the incoming data. + .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) + + // Filter out data before and after the given times so that it is not included + // in the calculations. As we collect data in batches (say, by day), the batch for the day + // that we want to analyze could potentially include some late-arriving data from the + // previous day. + // If so, we want to weed it out. Similarly, if we include data from the following day + // (to scoop up late-arriving events from the day we're analyzing), we need to weed out + // events that fall after the time period we want to analyze. + // [START DocInclude_HTSFilters] + .apply( + "FilterStartTime", + Filter.by( + (GameActionInfo gInfo) -> gInfo.getTimestamp() > startMinTimestamp.getMillis())) + .apply( + "FilterEndTime", + Filter.by( + (GameActionInfo gInfo) -> gInfo.getTimestamp() < stopMinTimestamp.getMillis())) + // [END DocInclude_HTSFilters] + + // [START DocInclude_HTSAddTsAndWindow] + // Add an element timestamp based on the event log, and apply fixed windowing. + .apply( + "AddEventTimestamps", + WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp()))) + .apply( + "FixedWindowsTeam", + Window.into(FixedWindows.of(Duration.standardMinutes(options.getWindowDuration())))) + // [END DocInclude_HTSAddTsAndWindow] + + // Extract and sum teamname/score pairs from the event data. + .apply("ExtractTeamScore", new ExtractAndSumScore("team")) + .apply( + "WriteTeamScoreSums", new WriteToText<>(options.getOutput(), configureOutput(), true)); pipeline.run().waitUntilFinish(); } diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java similarity index 87% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java index 4f0ee28128..ae32637e15 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java @@ -20,9 +20,9 @@ import com.google.common.annotations.VisibleForTesting; import java.util.HashMap; import java.util.Map; -import java.util.TimeZone; import ${package}.common.ExampleOptions; import ${package}.common.ExampleUtils; +import ${package}.complete.game.utils.GameConstants; import ${package}.complete.game.utils.WriteToBigQuery; import ${package}.complete.game.utils.WriteWindowedToBigQuery; import org.apache.beam.sdk.Pipeline; @@ -45,11 +45,8 @@ import org.apache.beam.sdk.transforms.windowing.Window; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PCollection; -import org.joda.time.DateTimeZone; import org.joda.time.Duration; import org.joda.time.Instant; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; /** * This class is the third in a series of four pipelines that tell a story in a 'gaming' domain, @@ -92,11 +89,6 @@ */ public class LeaderBoard extends HourlyTeamScore { - private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms"; - - private static DateTimeFormatter fmt = - DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); static final Duration FIVE_MINUTES = Duration.standardMinutes(5); static final Duration TEN_MINUTES = Duration.standardMinutes(10); @@ -140,30 +132,27 @@ interface Options extends HourlyTeamScore.Options, ExampleOptions, StreamingOpti configureWindowedTableWrite() { Map>> tableConfigure = - new HashMap>>(); + new HashMap<>(); tableConfigure.put( - "team", - new WriteWindowedToBigQuery.FieldInfo>( - "STRING", (c, w) -> c.element().getKey())); + "team", new WriteWindowedToBigQuery.FieldInfo<>("STRING", (c, w) -> c.element().getKey())); tableConfigure.put( "total_score", - new WriteWindowedToBigQuery.FieldInfo>( - "INTEGER", (c, w) -> c.element().getValue())); + new WriteWindowedToBigQuery.FieldInfo<>("INTEGER", (c, w) -> c.element().getValue())); tableConfigure.put( "window_start", - new WriteWindowedToBigQuery.FieldInfo>( + new WriteWindowedToBigQuery.FieldInfo<>( "STRING", (c, w) -> { IntervalWindow window = (IntervalWindow) w; - return fmt.print(window.start()); + return GameConstants.DATE_TIME_FORMATTER.print(window.start()); })); tableConfigure.put( "processing_time", - new WriteWindowedToBigQuery.FieldInfo>( - "STRING", (c, w) -> fmt.print(Instant.now()))); + new WriteWindowedToBigQuery.FieldInfo<>( + "STRING", (c, w) -> GameConstants.DATE_TIME_FORMATTER.print(Instant.now()))); tableConfigure.put( "timing", - new WriteWindowedToBigQuery.FieldInfo>( + new WriteWindowedToBigQuery.FieldInfo<>( "STRING", (c, w) -> c.pane().getTiming().toString())); return tableConfigure; } @@ -175,16 +164,12 @@ interface Options extends HourlyTeamScore.Options, ExampleOptions, StreamingOpti */ protected static Map>> configureBigQueryWrite() { - Map>> tableConfigure = - new HashMap>>(); + Map>> tableConfigure = new HashMap<>(); tableConfigure.put( - "user", - new WriteToBigQuery.FieldInfo>( - "STRING", (c, w) -> c.element().getKey())); + "user", new WriteToBigQuery.FieldInfo<>("STRING", (c, w) -> c.element().getKey())); tableConfigure.put( "total_score", - new WriteToBigQuery.FieldInfo>( - "INTEGER", (c, w) -> c.element().getValue())); + new WriteToBigQuery.FieldInfo<>("INTEGER", (c, w) -> c.element().getValue())); return tableConfigure; } @@ -200,8 +185,8 @@ interface Options extends HourlyTeamScore.Options, ExampleOptions, StreamingOpti configureBigQueryWrite(); tableConfigure.put( "processing_time", - new WriteToBigQuery.FieldInfo>( - "STRING", (c, w) -> fmt.print(Instant.now()))); + new WriteToBigQuery.FieldInfo<>( + "STRING", (c, w) -> GameConstants.DATE_TIME_FORMATTER.print(Instant.now()))); return tableConfigure; } @@ -218,7 +203,8 @@ public static void main(String[] args) throws Exception { // data elements, and parse the data. PCollection gameEvents = pipeline .apply(PubsubIO.readStrings() - .withTimestampAttribute(TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic())) + .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE) + .fromTopic(options.getTopic())) .apply("ParseGameEvent", ParDo.of(new ParseEventFn())); gameEvents @@ -230,7 +216,7 @@ public static void main(String[] args) throws Exception { // Write the results to BigQuery. .apply( "WriteTeamScoreSums", - new WriteWindowedToBigQuery>( + new WriteWindowedToBigQuery<>( options.as(GcpOptions.class).getProject(), options.getDataset(), options.getLeaderBoardTableName() + "_team", @@ -242,7 +228,7 @@ public static void main(String[] args) throws Exception { // Write the results to BigQuery. .apply( "WriteUserScoreSums", - new WriteToBigQuery>( + new WriteToBigQuery<>( options.as(GcpOptions.class).getProject(), options.getDataset(), options.getLeaderBoardTableName() + "_user", diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/StatefulTeamScore.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/StatefulTeamScore.java new file mode 100644 index 0000000000..c0a7bc8e17 --- /dev/null +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/StatefulTeamScore.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ${package}.complete.game; + +import static com.google.common.base.MoreObjects.firstNonNull; + +import com.google.common.annotations.VisibleForTesting; +import java.util.HashMap; +import java.util.Map; +import ${package}.common.ExampleUtils; +import ${package}.complete.game.utils.GameConstants; +import ${package}.complete.game.utils.WriteToBigQuery.FieldInfo; +import ${package}.complete.game.utils.WriteWindowedToBigQuery; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.PipelineResult; +import org.apache.beam.sdk.coders.VarIntCoder; +import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; +import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO; +import org.apache.beam.sdk.options.Default; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.state.StateSpec; +import org.apache.beam.sdk.state.StateSpecs; +import org.apache.beam.sdk.state.ValueState; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.values.TypeDescriptor; +import org.apache.beam.sdk.values.TypeDescriptors; +import org.joda.time.Instant; + +/** + * This class is part of a series of pipelines that tell a story in a gaming domain. Concepts + * include: stateful processing. + * + *

This pipeline processes an unbounded stream of 'game events'. It uses stateful processing to + * aggregate team scores per team and outputs team name and it's total score every time the team + * passes a new multiple of a threshold score. For example, multiples of the threshold could be the + * corresponding scores required to pass each level of the game. By default, this threshold is set + * to 5000. + * + *

Stateful processing allows us to write pipelines that output based on a runtime state (when + * a team reaches a certain score, in every 100 game events etc) without time triggers. See + * https://beam.apache.org/blog/2017/02/13/stateful-processing.html for more information on using + * stateful processing. + * + *

Run {@code injector.Injector} to generate pubsub data for this pipeline. The Injector + * documentation provides more detail on how to do this. + * + *

To execute this pipeline, specify the pipeline configuration like this: + *

{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
+ *   --runner=YOUR_RUNNER
+ *   --dataset=YOUR-DATASET
+ *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
+ * }
+ * 
+ * + *

The BigQuery dataset you specify must already exist. The PubSub topic you specify should be + * the same topic to which the Injector is publishing. + */ +public class StatefulTeamScore extends LeaderBoard { + + /** + * Options supported by {@link StatefulTeamScore}. + */ + interface Options extends LeaderBoard.Options { + + @Description("Numeric value, multiple of which is used as threshold for outputting team score.") + @Default.Integer(5000) + Integer getThresholdScore(); + + void setThresholdScore(Integer value); + } + + /** + * Create a map of information that describes how to write pipeline output to BigQuery. This map + * is used to write team score sums. + */ + private static Map>> configureCompleteWindowedTableWrite() { + + Map>> tableConfigure = + new HashMap<>(); + tableConfigure.put( + "team", new WriteWindowedToBigQuery.FieldInfo<>("STRING", (c, w) -> c.element().getKey())); + tableConfigure.put( + "total_score", + new WriteWindowedToBigQuery.FieldInfo<>("INTEGER", (c, w) -> c.element().getValue())); + tableConfigure.put( + "processing_time", + new WriteWindowedToBigQuery.FieldInfo<>( + "STRING", (c, w) -> GameConstants.DATE_TIME_FORMATTER.print(Instant.now()))); + return tableConfigure; + } + + + public static void main(String[] args) throws Exception { + + Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); + // Enforce that this pipeline is always run in streaming mode. + options.setStreaming(true); + ExampleUtils exampleUtils = new ExampleUtils(options); + Pipeline pipeline = Pipeline.create(options); + + pipeline + // Read game events from Pub/Sub using custom timestamps, which are extracted from the + // pubsub data elements, and parse the data. + .apply( + PubsubIO.readStrings() + .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE) + .fromTopic(options.getTopic())) + .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) + // Create mapping. UpdateTeamScore uses team name as key. + .apply( + "MapTeamAsKey", + MapElements.into( + TypeDescriptors.kvs( + TypeDescriptors.strings(), TypeDescriptor.of(GameActionInfo.class))) + .via((GameActionInfo gInfo) -> KV.of(gInfo.team, gInfo))) + // Outputs a team's score every time it passes a new multiple of the threshold. + .apply("UpdateTeamScore", ParDo.of(new UpdateTeamScoreFn(options.getThresholdScore()))) + // Write the results to BigQuery. + .apply( + "WriteTeamLeaders", + new WriteWindowedToBigQuery<>( + options.as(GcpOptions.class).getProject(), + options.getDataset(), + options.getLeaderBoardTableName() + "_team_leader", + configureCompleteWindowedTableWrite())); + + // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the + // command line. + PipelineResult result = pipeline.run(); + exampleUtils.waitToFinish(result); + } + + /** + * Tracks each team's score separately in a single state cell and outputs the score every time it + * passes a new multiple of a threshold. + * + *

We use stateful {@link DoFn} because: + *

    + *
  • State is key-partitioned. Therefore, the score is calculated per team.
  • + *
  • Stateful {@link DoFn} can determine when to output based on the state. This only allows + * outputting when a team's score passes a given threshold.
  • + *
+ */ + @VisibleForTesting + public static class UpdateTeamScoreFn + extends DoFn, KV> { + + private static final String TOTAL_SCORE = "totalScore"; + private final int thresholdScore; + + public UpdateTeamScoreFn(int thresholdScore) { + this.thresholdScore = thresholdScore; + } + + /** + * Describes the state for storing team score. Let's break down this statement. + * + * {@link StateSpec} configures the state cell, which is provided by a runner during pipeline + * execution. + * + * {@link org.apache.beam.sdk.transforms.DoFn.StateId} annotation assigns an identifier to the + * state, which is used to refer the state in + * {@link org.apache.beam.sdk.transforms.DoFn.ProcessElement}. + * + *

A {@link ValueState} stores single value per key and per window. Because our pipeline is + * globally windowed in this example, this {@link ValueState} is just key partitioned, with one + * score per team. Any other class that extends {@link org.apache.beam.sdk.state.State} can be + * used.

+ * + *

In order to store the value, the state must be encoded. Therefore, we provide a coder, in + * this case the {@link VarIntCoder}. If the coder is not provided as in + * {@code StateSpecs.value()}, Beam's coder inference will try to provide a coder automatically. + *

+ */ + @StateId(TOTAL_SCORE) + private final StateSpec> totalScoreSpec = + StateSpecs.value(VarIntCoder.of()); + + /** + * To use a state cell, annotate a parameter with + * {@link org.apache.beam.sdk.transforms.DoFn.StateId} that matches the state declaration. The + * type of the parameter should match the {@link StateSpec} type. + */ + @ProcessElement + public void processElement( + ProcessContext c, + @StateId(TOTAL_SCORE) ValueState totalScore) { + String teamName = c.element().getKey(); + GameActionInfo gInfo = c.element().getValue(); + + // ValueState cells do not contain a default value. If the state is possibly not written, make + // sure to check for null on read. + int oldTotalScore = firstNonNull(totalScore.read(), 0); + totalScore.write(oldTotalScore + gInfo.score); + + // Since there are no negative scores, the easiest way to check whether a team just passed a + // new multiple of the threshold score is to compare the quotients of dividing total scores by + // threshold before and after this aggregation. For example, if the total score was 1999, + // the new total is 2002, and the threshold is 1000, 1999 / 1000 = 1, 2002 / 1000 = 2. + // Therefore, this team passed the threshold. + if (oldTotalScore / this.thresholdScore < totalScore.read() / this.thresholdScore) { + c.output(KV.of(teamName, totalScore.read())); + } + } + } +} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java similarity index 94% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java index c693614c57..f7aa8ff8c0 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java @@ -162,10 +162,11 @@ public PCollection> expand( PCollection gameInfo) { return gameInfo - .apply(MapElements - .into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) - .via((GameActionInfo gInfo) -> KV.of(gInfo.getKey(field), gInfo.getScore()))) - .apply(Sum.integersPerKey()); + .apply( + MapElements.into( + TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) + .via((GameActionInfo gInfo) -> KV.of(gInfo.getKey(field), gInfo.getScore()))) + .apply(Sum.integersPerKey()); } } // [END DocInclude_USExtractXform] @@ -196,8 +197,7 @@ public interface Options extends PipelineOptions { */ protected static Map>> configureOutput() { - Map>> config = - new HashMap>>(); + Map>> config = new HashMap<>(); config.put("user", (c, w) -> c.element().getKey()); config.put("total_score", (c, w) -> c.element().getValue()); return config; @@ -219,11 +219,7 @@ public static void main(String[] args) throws Exception { // Extract and sum username/score pairs from the event data. .apply("ExtractUserScore", new ExtractAndSumScore("user")) .apply( - "WriteUserScoreSums", - new WriteToText>( - options.getOutput(), - configureOutput(), - false)); + "WriteUserScoreSums", new WriteToText<>(options.getOutput(), configureOutput(), false)); // Run the batch pipeline. pipeline.run().waitUntilFinish(); diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java similarity index 89% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java index 980966e0ce..952cb6fc34 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java @@ -30,11 +30,7 @@ import java.util.Arrays; import java.util.List; import java.util.Random; -import java.util.TimeZone; -import org.joda.time.DateTimeZone; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; - +import ${package}.complete.game.utils.GameConstants; /** * This is a generator that simulates usage data from a mobile game, and either publishes the data @@ -86,7 +82,6 @@ class Injector { private static Random random = new Random(); private static String topic; private static String project; - private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms"; // QPS ranges from 800 to 1000. private static final int MIN_QPS = 800; @@ -96,25 +91,56 @@ class Injector { // Lists used to generate random team names. private static final ArrayList COLORS = - new ArrayList(Arrays.asList( - "Magenta", "AliceBlue", "Almond", "Amaranth", "Amber", - "Amethyst", "AndroidGreen", "AntiqueBrass", "Fuchsia", "Ruby", "AppleGreen", - "Apricot", "Aqua", "ArmyGreen", "Asparagus", "Auburn", "Azure", "Banana", - "Beige", "Bisque", "BarnRed", "BattleshipGrey")); + new ArrayList<>( + Arrays.asList( + "Magenta", + "AliceBlue", + "Almond", + "Amaranth", + "Amber", + "Amethyst", + "AndroidGreen", + "AntiqueBrass", + "Fuchsia", + "Ruby", + "AppleGreen", + "Apricot", + "Aqua", + "ArmyGreen", + "Asparagus", + "Auburn", + "Azure", + "Banana", + "Beige", + "Bisque", + "BarnRed", + "BattleshipGrey")); private static final ArrayList ANIMALS = - new ArrayList(Arrays.asList( - "Echidna", "Koala", "Wombat", "Marmot", "Quokka", "Kangaroo", "Dingo", "Numbat", "Emu", - "Wallaby", "CaneToad", "Bilby", "Possum", "Cassowary", "Kookaburra", "Platypus", - "Bandicoot", "Cockatoo", "Antechinus")); + new ArrayList<>( + Arrays.asList( + "Echidna", + "Koala", + "Wombat", + "Marmot", + "Quokka", + "Kangaroo", + "Dingo", + "Numbat", + "Emu", + "Wallaby", + "CaneToad", + "Bilby", + "Possum", + "Cassowary", + "Kookaburra", + "Platypus", + "Bandicoot", + "Cockatoo", + "Antechinus")); // The list of live teams. - private static ArrayList liveTeams = new ArrayList(); - - private static DateTimeFormatter fmt = - DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); - + private static ArrayList liveTeams = new ArrayList<>(); // The total number of robots in the system. private static final int NUM_ROBOTS = 20; @@ -275,7 +301,7 @@ private static String addTimeInfoToEvent(String message, Long currTime, int dela String eventTimeString = Long.toString((currTime - delayInMillis) / 1000 * 1000); // Add a (redundant) 'human-readable' date string to make the data semantics more clear. - String dateString = fmt.print(currTime); + String dateString = GameConstants.DATE_TIME_FORMATTER.print(currTime); message = message + "," + eventTimeString + "," + dateString; return message; } @@ -294,7 +320,7 @@ public static void publishData(int numMessages, int delayInMillis) PubsubMessage pubsubMessage = new PubsubMessage() .encodeData(message.getBytes("UTF-8")); pubsubMessage.setAttributes( - ImmutableMap.of(TIMESTAMP_ATTRIBUTE, + ImmutableMap.of(GameConstants.TIMESTAMP_ATTRIBUTE, Long.toString((currTime - delayInMillis) / 1000 * 1000))); if (delayInMillis != 0) { System.out.println(pubsubMessage.getAttributes()); @@ -394,16 +420,15 @@ public static void main(String[] args) throws IOException, InterruptedException publishDataToFile(fileName, numMessages, delayInMillis); } else { // Write to PubSub. // Start a thread to inject some data. - new Thread(){ - @Override - public void run() { - try { - publishData(numMessages, delayInMillis); - } catch (IOException e) { - System.err.println(e); - } - } - }.start(); + new Thread( + () -> { + try { + publishData(numMessages, delayInMillis); + } catch (IOException e) { + System.err.println(e); + } + }) + .start(); } // Wait before creating another injector thread. diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java similarity index 100% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java similarity index 71% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java index 5d0cc68763..e90fbcc18e 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java @@ -24,11 +24,9 @@ import com.google.api.client.http.HttpBackOffUnsuccessfulResponseHandler; import com.google.api.client.http.HttpRequest; import com.google.api.client.http.HttpRequestInitializer; -import com.google.api.client.http.HttpResponse; import com.google.api.client.http.HttpUnsuccessfulResponseHandler; import com.google.api.client.util.ExponentialBackOff; import com.google.api.client.util.Sleeper; -import java.io.IOException; import java.util.logging.Logger; /** @@ -96,32 +94,20 @@ public final void initialize(final HttpRequest request) { new ExponentialBackOff()) .setSleeper(sleeper); request.setInterceptor(wrappedCredential); - request.setUnsuccessfulResponseHandler( - new HttpUnsuccessfulResponseHandler() { - @Override - public boolean handleResponse( - final HttpRequest request, - final HttpResponse response, - final boolean supportsRetry) throws IOException { - if (wrappedCredential.handleResponse( - request, response, supportsRetry)) { - // If credential decides it can handle it, - // the return code or message indicated - // something specific to authentication, - // and no backoff is desired. - return true; - } else if (backoffHandler.handleResponse( - request, response, supportsRetry)) { - // Otherwise, we defer to the judgement of - // our internal backoff handler. - LOG.info("Retrying " - + request.getUrl().toString()); - return true; - } else { - return false; - } - } - }); + request.setUnsuccessfulResponseHandler( + (request1, response, supportsRetry) -> { + if (wrappedCredential.handleResponse(request1, response, supportsRetry)) { + // If credential decides it can handle it, the return code or message indicated + // something specific to authentication, and no backoff is desired. + return true; + } else if (backoffHandler.handleResponse(request1, response, supportsRetry)) { + // Otherwise, we defer to the judgement of our internal backoff handler. + LOG.info("Retrying " + request1.getUrl().toString()); + return true; + } else { + return false; + } + }); request.setIOExceptionHandler( new HttpBackOffIOExceptionHandler(new ExponentialBackOff()) .setSleeper(sleeper)); diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java similarity index 55% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java index 90f935c3ce..93da132690 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java @@ -15,23 +15,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package ${package}.common; +package ${package}.complete.game.utils; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; +import java.util.TimeZone; +import org.joda.time.DateTimeZone; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; /** - * Options that can be used to configure the Beam examples. + * Shared constants between game series classes. */ -public interface ExampleOptions extends PipelineOptions { - @Description("Whether to keep jobs running after local process exit") - @Default.Boolean(false) - boolean getKeepJobsRunning(); - void setKeepJobsRunning(boolean keepJobsRunning); +public class GameConstants { - @Description("Number of workers to use when executing the injector pipeline") - @Default.Integer(1) - int getInjectorNumWorkers(); - void setInjectorNumWorkers(int numWorkers); + public static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms"; + + public static final DateTimeFormatter DATE_TIME_FORMATTER = + DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") + .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); } diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java similarity index 100% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java similarity index 99% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java index dbd5e39977..45135fb059 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java @@ -83,7 +83,7 @@ protected class BuildRowFn extends DoFn { @ProcessElement public void processElement(ProcessContext c, BoundedWindow window) { - List fields = new ArrayList(); + List fields = new ArrayList<>(); for (Map.Entry> entry : fieldFn.entrySet()) { String key = entry.getKey(); FieldFn fcn = entry.getValue(); diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java similarity index 100% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/MinimalWordCountJava8Test.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/MinimalWordCountTest.java similarity index 61% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/MinimalWordCountJava8Test.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/MinimalWordCountTest.java index af347c1c0a..f4c8b160d7 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/MinimalWordCountJava8Test.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/MinimalWordCountTest.java @@ -21,11 +21,9 @@ import java.io.IOException; import java.io.Serializable; import java.nio.channels.FileChannel; -import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; import java.nio.file.StandardOpenOption; import java.util.Arrays; -import java.util.List; import org.apache.beam.sdk.extensions.gcp.options.GcsOptions; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.testing.TestPipeline; @@ -42,15 +40,13 @@ import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import org.mockito.Mockito; -import org.mockito.invocation.InvocationOnMock; -import org.mockito.stubbing.Answer; /** - * To keep {@link MinimalWordCountJava8} simple, it is not factored or testable. This test + * To keep {@link MinimalWordCount} simple, it is not factored or testable. This test * file should be maintained with a copy of its code for a basic smoke test. */ @RunWith(JUnit4.class) -public class MinimalWordCountJava8Test implements Serializable { +public class MinimalWordCountTest implements Serializable { @Rule public TestPipeline p = TestPipeline.create().enableAbandonedNodeEnforcement(false); @@ -59,46 +55,39 @@ public class MinimalWordCountJava8Test implements Serializable { * A basic smoke test that ensures there is no crash at pipeline construction time. */ @Test - public void testMinimalWordCountJava8() throws Exception { + public void testMinimalWordCount() throws Exception { p.getOptions().as(GcsOptions.class).setGcsUtil(buildMockGcsUtil()); p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) - .apply(FlatMapElements - .into(TypeDescriptors.strings()) - .via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))) - .apply(Filter.by((String word) -> !word.isEmpty())) - .apply(Count.perElement()) - .apply(MapElements - .into(TypeDescriptors.strings()) - .via((KV wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())) - .apply(TextIO.write().to("gs://your-output-bucket/and-output-prefix")); + .apply( + FlatMapElements.into(TypeDescriptors.strings()) + .via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))) + .apply(Filter.by((String word) -> !word.isEmpty())) + .apply(Count.perElement()) + .apply( + MapElements.into(TypeDescriptors.strings()) + .via( + (KV wordCount) -> + wordCount.getKey() + ": " + wordCount.getValue())) + .apply(TextIO.write().to("gs://your-output-bucket/and-output-prefix")); } private GcsUtil buildMockGcsUtil() throws IOException { GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class); // Any request to open gets a new bogus channel - Mockito - .when(mockGcsUtil.open(Mockito.any(GcsPath.class))) - .then(new Answer() { - @Override - public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable { - return FileChannel.open( - Files.createTempFile("channel-", ".tmp"), - StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE); - } - }); + Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class))) + .then( + invocation -> + FileChannel.open( + Files.createTempFile("channel-", ".tmp"), + StandardOpenOption.CREATE, + StandardOpenOption.DELETE_ON_CLOSE)); // Any request for expansion returns a list containing the original GcsPath // This is required to pass validation that occurs in TextIO during apply() - Mockito - .when(mockGcsUtil.expand(Mockito.any(GcsPath.class))) - .then(new Answer>() { - @Override - public List answer(InvocationOnMock invocation) throws Throwable { - return ImmutableList.of((GcsPath) invocation.getArguments()[0]); - } - }); + Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class))) + .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0])); return mockGcsUtil; } diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java index b4e4124e26..91a1bf8edc 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java @@ -53,8 +53,7 @@ public void testExtractWordsFn() throws Exception { Assert.assertThat(extractWordsFn.processBundle(" some input words "), CoreMatchers.hasItems("some", "input", "words")); - Assert.assertThat(extractWordsFn.processBundle(" "), - CoreMatchers.hasItems()); + Assert.assertThat(extractWordsFn.processBundle(" "), CoreMatchers.hasItems()); Assert.assertThat(extractWordsFn.processBundle(" some ", " input", " words"), CoreMatchers.hasItems("some", "input", "words")); } diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java similarity index 100% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java similarity index 100% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java similarity index 97% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java index 6075c564b7..2478c07fa8 100644 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java @@ -32,7 +32,6 @@ import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.testing.TestStream; import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.transforms.windowing.BoundedWindow; import org.apache.beam.sdk.transforms.windowing.GlobalWindow; import org.apache.beam.sdk.transforms.windowing.IntervalWindow; @@ -240,13 +239,14 @@ public void testTeamScoresObservablyLate() { String redTeam = TestUser.RED_ONE.getTeam(); PAssert.that(teamScores) .inWindow(window) - .satisfies((SerializableFunction>, Void>) input -> { - // The final sums need not exist in the same pane, but must appear in the output - // PCollection - assertThat(input, hasItem(KV.of(blueTeam, 11))); - assertThat(input, hasItem(KV.of(redTeam, 27))); - return null; - }); + .satisfies( + input -> { + // The final sums need not exist in the same pane, but must appear in the output + // PCollection + assertThat(input, hasItem(KV.of(blueTeam, 11))); + assertThat(input, hasItem(KV.of(redTeam, 27))); + return null; + }); PAssert.thatMap(teamScores) // The closing behavior of CalculateTeamScores precludes an inFinalPane matcher .inOnTimePane(window) diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java new file mode 100644 index 0000000000..d48b450547 --- /dev/null +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ${package}.complete.game; + +import ${package}.complete.game.StatefulTeamScore.UpdateTeamScoreFn; +import ${package}.complete.game.UserScore.GameActionInfo; +import org.apache.beam.sdk.coders.AvroCoder; +import org.apache.beam.sdk.coders.KvCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.testing.TestStream; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.windowing.FixedWindows; +import org.apache.beam.sdk.transforms.windowing.GlobalWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow; +import org.apache.beam.sdk.transforms.windowing.Window; +import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.TimestampedValue; +import org.joda.time.Duration; +import org.joda.time.Instant; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** + * Tests for {@link StatefulTeamScore}. + */ +@RunWith(JUnit4.class) +public class StatefulTeamScoreTest { + + private static final Duration ALLOWED_LATENESS = Duration.standardHours(1); + private static final Duration TEAM_WINDOW_DURATION = Duration.standardMinutes(20); + private Instant baseTime = new Instant(0); + + @Rule + public TestPipeline p = TestPipeline.create(); + + /** + * Some example users, on two separate teams. + */ + private enum TestUser { + RED_ONE("scarlet", "red"), RED_TWO("burgundy", "red"), + BLUE_ONE("navy", "blue"), BLUE_TWO("sky", "blue"); + + private final String userName; + private final String teamName; + + TestUser(String userName, String teamName) { + this.userName = userName; + this.teamName = teamName; + } + + public String getUser() { + return userName; + } + + public String getTeam() { + return teamName; + } + } + + /** + * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs + * correctly for one team. + */ + @Test + public void testScoreUpdatesOneTeam() { + + TestStream> createEvents = TestStream.create(KvCoder.of( + StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) + .advanceWatermarkTo(baseTime) + .addElements( + event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)), + event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)), + event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)), + event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)), + event(TestUser.RED_TWO, 201, Duration.standardSeconds(50)) + ) + .advanceWatermarkToInfinity(); + + PCollection> teamScores = p.apply(createEvents) + .apply(ParDo.of(new UpdateTeamScoreFn(100))); + + String redTeam = TestUser.RED_ONE.getTeam(); + + PAssert.that(teamScores) + .inWindow(GlobalWindow.INSTANCE) + .containsInAnyOrder( + KV.of(redTeam, 100), + KV.of(redTeam, 200), + KV.of(redTeam, 401) + ); + + p.run().waitUntilFinish(); + } + + /** + * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs + * correctly for multiple teams. + */ + @Test + public void testScoreUpdatesPerTeam() { + + TestStream> createEvents = TestStream.create(KvCoder.of( + StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) + .advanceWatermarkTo(baseTime) + .addElements( + event(TestUser.RED_ONE, 50, Duration.standardSeconds(10)), + event(TestUser.RED_TWO, 50, Duration.standardSeconds(20)), + event(TestUser.BLUE_ONE, 70, Duration.standardSeconds(30)), + event(TestUser.BLUE_TWO, 80, Duration.standardSeconds(40)), + event(TestUser.BLUE_TWO, 50, Duration.standardSeconds(50)) + ) + .advanceWatermarkToInfinity(); + + PCollection> teamScores = p.apply(createEvents) + .apply(ParDo.of(new UpdateTeamScoreFn(100))); + + String redTeam = TestUser.RED_ONE.getTeam(); + String blueTeam = TestUser.BLUE_ONE.getTeam(); + + PAssert.that(teamScores) + .inWindow(GlobalWindow.INSTANCE) + .containsInAnyOrder( + KV.of(redTeam, 100), + KV.of(blueTeam, 150), + KV.of(blueTeam, 200) + ); + + p.run().waitUntilFinish(); + } + + /** + * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs + * correctly per window and per key. + */ + @Test + public void testScoreUpdatesPerWindow() { + + TestStream> createEvents = TestStream.create(KvCoder.of( + StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) + .advanceWatermarkTo(baseTime) + .addElements( + event(TestUser.RED_ONE, 50, Duration.standardMinutes(1)), + event(TestUser.RED_TWO, 50, Duration.standardMinutes(2)), + event(TestUser.RED_ONE, 50, Duration.standardMinutes(3)), + event(TestUser.RED_ONE, 60, Duration.standardMinutes(6)), + event(TestUser.RED_TWO, 60, Duration.standardMinutes(7)) + ) + .advanceWatermarkToInfinity(); + + Duration teamWindowDuration = Duration.standardMinutes(5); + + PCollection> teamScores = p + .apply(createEvents) + .apply(Window.>into(FixedWindows.of(teamWindowDuration))) + .apply(ParDo.of(new UpdateTeamScoreFn(100))); + + String redTeam = TestUser.RED_ONE.getTeam(); + String blueTeam = TestUser.BLUE_ONE.getTeam(); + + IntervalWindow window1 = new IntervalWindow(baseTime, teamWindowDuration); + IntervalWindow window2 = new IntervalWindow(window1.end(), teamWindowDuration); + + PAssert.that(teamScores) + .inWindow(window1) + .containsInAnyOrder( + KV.of(redTeam, 100) + ); + + PAssert.that(teamScores) + .inWindow(window2) + .containsInAnyOrder( + KV.of(redTeam, 120) + ); + + p.run().waitUntilFinish(); + } + + private TimestampedValue> event( + TestUser user, + int score, + Duration baseTimeOffset) { + return TimestampedValue.of(KV.of(user.getTeam(), new GameActionInfo(user.getUser(), + user.getTeam(), + score, + baseTime.plus(baseTimeOffset).getMillis())), baseTime.plus(baseTimeOffset)); + } +} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java similarity index 100% rename from maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java rename to maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties index 8a76657024..b0195b3f16 100644 --- a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties +++ b/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties @@ -16,4 +16,4 @@ package=it.pkg version=0.1 groupId=archetype.it artifactId=basic -targetPlatform=1.7 +targetPlatform=1.8 diff --git a/maven-archetypes/pom.xml b/maven-archetypes/pom.xml index 7e5eb44cc9..53eeaf01b8 100644 --- a/maven-archetypes/pom.xml +++ b/maven-archetypes/pom.xml @@ -33,7 +33,6 @@ starter examples - examples-java8 diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml index 34bfd076bb..000e743cd2 100644 --- a/maven-archetypes/starter/pom.xml +++ b/maven-archetypes/starter/pom.xml @@ -39,7 +39,7 @@ org.apache.maven.archetype archetype-packaging - 2.4 + ${archetype-packaging.version} @@ -55,12 +55,12 @@ maven-archetype-plugin - 2.4 + ${maven-archetype-plugin.version} org.apache.maven.shared maven-invoker - 2.2 + ${maven-invoker.version} diff --git a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml index 4c22d5d68b..428c74aa4a 100644 --- a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml +++ b/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml @@ -21,7 +21,7 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> - 1.7 + 1.8 diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties index 8a76657024..b0195b3f16 100644 --- a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties +++ b/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties @@ -16,4 +16,4 @@ package=it.pkg version=0.1 groupId=archetype.it artifactId=basic -targetPlatform=1.7 +targetPlatform=1.8 diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml index 8e4edbd29e..506665830a 100644 --- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml +++ b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml @@ -51,8 +51,8 @@ maven-compiler-plugin ${maven-compiler-plugin.version} - 1.7 - 1.7 + 1.8 + 1.8 diff --git a/pom.xml b/pom.xml index a3ce24f916..320ac544c3 100644 --- a/pom.xml +++ b/pom.xml @@ -99,18 +99,21 @@ + 1.8 + UTF-8 ${maven.build.timestamp} yyyy-MM-dd HH:mm - 2.2.0 + 2.3.0 Google Cloud Dataflow SDK for Java - ${project.version}-20170517 + ${beam.version} 6 1 v2-rev355-1.22.0 + 6.19 1.22.0 20.0 1.3 @@ -120,11 +123,22 @@ v1-rev10-1.22.0 1.7.25 + 2.4 + 1.6.0 2.20 + 2.4 + 2.17 + 3.0.0 3.6.2 - 1.6.0 + 3.0.1 + 2.2 3.0.2 + 3.0.0-M1 + 2.5.3 + 3.0.2 3.0.0 + 2.20 + 3.0.1 pom @@ -137,47 +151,19 @@ - - org.apache.maven.plugins - maven-enforcer-plugin - 1.4.1 - - - enforce-java - - enforce - - - - - - [1.8.0,) - - - - - - - org.apache.maven.plugins maven-clean-plugin - 3.0.0 + ${maven-clean-plugin.version} org.apache.maven.plugins maven-compiler-plugin - 3.6.2 + ${maven-compiler-plugin.version} - 1.7 - 1.7 + ${java.version} + ${java.version} -Xlint:all -Werror @@ -192,12 +178,12 @@ org.apache.maven.plugins maven-checkstyle-plugin - 2.17 + ${maven-checkstyle-plugin.version} com.puppycrawl.tools checkstyle - 6.19 + ${checkstyle.version} org.apache.beam @@ -232,7 +218,7 @@ org.apache.maven.plugins maven-jar-plugin - 3.0.2 + ${maven-jar-plugin.version} true @@ -255,7 +241,7 @@ org.apache.maven.plugins maven-source-plugin - 3.0.1 + ${maven-source-plugin.version} attach-sources @@ -277,7 +263,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 2.10.4 + ${maven-javadoc-plugin.version} false @@ -295,13 +281,13 @@ org.apache.maven.plugins maven-resources-plugin - 3.0.2 + ${maven-resources-plugin.version} org.apache.maven.plugins maven-dependency-plugin - 3.0.0 + ${maven-dependency-plugin.version} @@ -317,18 +303,18 @@ org.apache.maven.plugins maven-surefire-plugin - 2.20 + ${maven-surefire-plugin.version} org.apache.maven.plugins maven-archetype-plugin - 2.4 + ${maven-archetype-plugin.version} org.apache.maven.shared maven-invoker - 2.2 + ${maven-invoker.version} @@ -354,7 +340,7 @@ org.apache.maven.plugins maven-release-plugin - 2.5.3 + ${maven-release-plugin} true true @@ -365,7 +351,7 @@ org.codehaus.mojo exec-maven-plugin - 1.5.0 + ${exec-maven-plugin.version} false @@ -374,11 +360,6 @@ - - org.apache.maven.plugins - maven-enforcer-plugin - - org.apache.maven.plugins maven-compiler-plugin From 8090e858362899a7a6682b2f46dea882820626b3 Mon Sep 17 00:00:00 2001 From: Batkhuyag Batsaikhan Date: Tue, 27 Feb 2018 18:45:50 -0800 Subject: [PATCH 05/25] removed java8 example --- pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pom.xml b/pom.xml index 320ac544c3..e24580f9e9 100644 --- a/pom.xml +++ b/pom.xml @@ -430,12 +430,6 @@ ${beam.version}
- - org.apache.beam - beam-examples-java8 - ${beam.version} - - junit junit From 5b20661f8ae21f390aa764daf9a3b14da060bce0 Mon Sep 17 00:00:00 2001 From: Batkhuyag Batsaikhan Date: Tue, 27 Feb 2018 19:41:41 -0800 Subject: [PATCH 06/25] [maven-release-plugin] prepare for the current development iteration --- examples/pom.xml | 2 +- maven-archetypes/examples/pom.xml | 2 +- maven-archetypes/pom.xml | 2 +- maven-archetypes/starter/pom.xml | 2 +- pom.xml | 2 +- sdk/pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/pom.xml b/examples/pom.xml index 75a1d92174..ef6d842c47 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.2.0-SNAPSHOT + 2.3.0-SNAPSHOT google-cloud-dataflow-java-examples-all diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml index 5ff4872335..477a670dcb 100644 --- a/maven-archetypes/examples/pom.xml +++ b/maven-archetypes/examples/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.2.0-SNAPSHOT + 2.3.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/pom.xml b/maven-archetypes/pom.xml index 53eeaf01b8..9774b06575 100644 --- a/maven-archetypes/pom.xml +++ b/maven-archetypes/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.2.0-SNAPSHOT + 2.3.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml index 000e743cd2..3d51805b04 100644 --- a/maven-archetypes/starter/pom.xml +++ b/maven-archetypes/starter/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.2.0-SNAPSHOT + 2.3.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index e24580f9e9..937f0aa2ff 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ http://cloud.google.com/dataflow 2013 - 2.2.0-SNAPSHOT + 2.3.0-SNAPSHOT diff --git a/sdk/pom.xml b/sdk/pom.xml index 33f2255f82..d6ede19a5d 100644 --- a/sdk/pom.xml +++ b/sdk/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.2.0-SNAPSHOT + 2.3.0-SNAPSHOT google-cloud-dataflow-java-sdk-all From 0e8ae93463edfc32019a01369e440edec922e91a Mon Sep 17 00:00:00 2001 From: Batkhuyag Batsaikhan Date: Tue, 27 Feb 2018 19:43:58 -0800 Subject: [PATCH 07/25] [maven-release-plugin] prepare branch release-2.3.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 937f0aa2ff..11621568fc 100644 --- a/pom.xml +++ b/pom.xml @@ -54,7 +54,7 @@ scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - HEAD + release-2.3.0 From 3f896cb574abf90f1fcad7c46dce0488c8e69cb2 Mon Sep 17 00:00:00 2001 From: Batkhuyag Batsaikhan Date: Tue, 27 Feb 2018 19:43:58 -0800 Subject: [PATCH 08/25] [maven-release-plugin] prepare for next development iteration --- examples/pom.xml | 2 +- maven-archetypes/examples/pom.xml | 2 +- maven-archetypes/pom.xml | 2 +- maven-archetypes/starter/pom.xml | 2 +- pom.xml | 4 ++-- sdk/pom.xml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/pom.xml b/examples/pom.xml index ef6d842c47..14d46dc1ef 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.3.0-SNAPSHOT + 2.4.0-SNAPSHOT google-cloud-dataflow-java-examples-all diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml index 477a670dcb..13b0d6114d 100644 --- a/maven-archetypes/examples/pom.xml +++ b/maven-archetypes/examples/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.3.0-SNAPSHOT + 2.4.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/pom.xml b/maven-archetypes/pom.xml index 9774b06575..70bd4462cd 100644 --- a/maven-archetypes/pom.xml +++ b/maven-archetypes/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.3.0-SNAPSHOT + 2.4.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml index 3d51805b04..cb80d97dc8 100644 --- a/maven-archetypes/starter/pom.xml +++ b/maven-archetypes/starter/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.3.0-SNAPSHOT + 2.4.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 11621568fc..8fa0890f60 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ http://cloud.google.com/dataflow 2013 - 2.3.0-SNAPSHOT + 2.4.0-SNAPSHOT @@ -54,7 +54,7 @@ scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - release-2.3.0 + HEAD diff --git a/sdk/pom.xml b/sdk/pom.xml index d6ede19a5d..c17eacadca 100644 --- a/sdk/pom.xml +++ b/sdk/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.3.0-SNAPSHOT + 2.4.0-SNAPSHOT google-cloud-dataflow-java-sdk-all From 16729af118a52b0fb2a64b85e3962e7689a18518 Mon Sep 17 00:00:00 2001 From: tvalentyn Date: Thu, 22 Mar 2018 16:17:33 -0700 Subject: [PATCH 09/25] Use beam-x.y.z containers for DF SDK releases moving forward. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8fa0890f60..f22890e0b9 100644 --- a/pom.xml +++ b/pom.xml @@ -108,7 +108,7 @@ 2.3.0 Google Cloud Dataflow SDK for Java - ${beam.version} + beam-${beam.version} 6 1 From 3e015a8884604b5e22ccbf2e1fb576a934917720 Mon Sep 17 00:00:00 2001 From: tvalentyn Date: Thu, 22 Mar 2018 16:17:33 -0700 Subject: [PATCH 10/25] Use beam-x.y.z containers for DF SDK releases moving forward. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8fa0890f60..f22890e0b9 100644 --- a/pom.xml +++ b/pom.xml @@ -108,7 +108,7 @@ 2.3.0 Google Cloud Dataflow SDK for Java - ${beam.version} + beam-${beam.version} 6 1 From b391210d013538d55c5ea85f3b78d640ab031d9e Mon Sep 17 00:00:00 2001 From: akedin Date: Tue, 27 Mar 2018 15:49:43 -0700 Subject: [PATCH 11/25] Upgrade to Apache Beam version 2.4.0 --- maven-archetypes/examples/pom.xml | 28 +++++++++++++++++++ .../resources/archetype-resources/pom.xml | 2 +- .../complete/game/utils/WriteToBigQuery.java | 1 + .../complete/game/StatefulTeamScoreTest.java | 2 -- maven-archetypes/pom.xml | 2 +- pom.xml | 19 +++++++------ 6 files changed, 41 insertions(+), 13 deletions(-) diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml index 13b0d6114d..cc50502e5e 100644 --- a/maven-archetypes/examples/pom.xml +++ b/maven-archetypes/examples/pom.xml @@ -71,6 +71,34 @@
+ + + + org.eclipse.m2e + lifecycle-mapping + ${eclipse-m2e.version} + + + + + + org.codehaus.mojo + exec-maven-plugin + [1.5.0,) + + exec + + + + + false + + + + + + + diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml index dcbedafd76..04d70618d9 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml @@ -254,7 +254,7 @@ org.mockito - mockito-all + mockito-core ${mockito.version} test diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java index 984e958c50..d35a4ffcfc 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java @@ -91,6 +91,7 @@ FieldFn getFieldFn() { return this.fieldFn; } } + /** Convert each key/score pair into a BigQuery TableRow as specified by fieldFn. */ protected class BuildRowFn extends DoFn { diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java index d48b450547..c80c57f4fc 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java @@ -47,8 +47,6 @@ @RunWith(JUnit4.class) public class StatefulTeamScoreTest { - private static final Duration ALLOWED_LATENESS = Duration.standardHours(1); - private static final Duration TEAM_WINDOW_DURATION = Duration.standardMinutes(20); private Instant baseTime = new Instant(0); @Rule diff --git a/maven-archetypes/pom.xml b/maven-archetypes/pom.xml index 70bd4462cd..1d0500729c 100644 --- a/maven-archetypes/pom.xml +++ b/maven-archetypes/pom.xml @@ -69,7 +69,7 @@
- + diff --git a/pom.xml b/pom.xml index f22890e0b9..be1fba9dd6 100644 --- a/pom.xml +++ b/pom.xml @@ -105,15 +105,16 @@ ${maven.build.timestamp} yyyy-MM-dd HH:mm - 2.3.0 + 2.4.0 Google Cloud Dataflow SDK for Java beam-${beam.version} 6 1 - v2-rev355-1.22.0 - 6.19 + v2-rev374-1.22.0 + 8.7 + 1.0.0 1.22.0 20.0 1.3 @@ -125,19 +126,19 @@ 2.4 1.6.0 - 2.20 + 2.20.1 2.4 - 2.17 + 3.0.0 3.0.0 - 3.6.2 - 3.0.1 + 3.7.0 + 3.0.2 2.2 3.0.2 3.0.0-M1 2.5.3 3.0.2 - 3.0.0 - 2.20 + 3.1.0 + 2.20.1 3.0.1 From f75991996c8cb39473519a7138c814e2b62b7919 Mon Sep 17 00:00:00 2001 From: Anton Kedin Date: Wed, 28 Mar 2018 10:13:58 -0700 Subject: [PATCH 12/25] [maven-release-plugin] prepare branch release-2.4.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index be1fba9dd6..621f2bc192 100644 --- a/pom.xml +++ b/pom.xml @@ -54,7 +54,7 @@ scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - HEAD + release-2.4.0 From 46b74c852ad97fbfedfc2b3e4d7d46143546d662 Mon Sep 17 00:00:00 2001 From: Anton Kedin Date: Wed, 28 Mar 2018 10:13:58 -0700 Subject: [PATCH 13/25] [maven-release-plugin] prepare for next development iteration --- examples/pom.xml | 2 +- maven-archetypes/examples/pom.xml | 2 +- maven-archetypes/pom.xml | 2 +- maven-archetypes/starter/pom.xml | 2 +- pom.xml | 4 ++-- sdk/pom.xml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/pom.xml b/examples/pom.xml index 14d46dc1ef..54b3613909 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.4.0-SNAPSHOT + 2.5.0-SNAPSHOT google-cloud-dataflow-java-examples-all diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml index cc50502e5e..2c4a6eb9fe 100644 --- a/maven-archetypes/examples/pom.xml +++ b/maven-archetypes/examples/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.4.0-SNAPSHOT + 2.5.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/pom.xml b/maven-archetypes/pom.xml index 1d0500729c..95f647d608 100644 --- a/maven-archetypes/pom.xml +++ b/maven-archetypes/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.4.0-SNAPSHOT + 2.5.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml index cb80d97dc8..4e91b8de88 100644 --- a/maven-archetypes/starter/pom.xml +++ b/maven-archetypes/starter/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.4.0-SNAPSHOT + 2.5.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 621f2bc192..4b3108219d 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ http://cloud.google.com/dataflow 2013 - 2.4.0-SNAPSHOT + 2.5.0-SNAPSHOT @@ -54,7 +54,7 @@ scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - release-2.4.0 + HEAD diff --git a/sdk/pom.xml b/sdk/pom.xml index c17eacadca..475af49854 100644 --- a/sdk/pom.xml +++ b/sdk/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.4.0-SNAPSHOT + 2.5.0-SNAPSHOT google-cloud-dataflow-java-sdk-all From fb0d662af48359d2d123a4a7ec4b63ce92ede1dd Mon Sep 17 00:00:00 2001 From: Chanseok Oh Date: Wed, 2 May 2018 14:00:07 -0400 Subject: [PATCH 14/25] Fix undefined property bug --- .../starter/src/main/resources/archetype-resources/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml index 22f717f97a..da443b16fa 100644 --- a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml +++ b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml @@ -26,7 +26,7 @@ UTF-8 @maven-compiler-plugin.version@ - @maven-exec-plugin.version@ + @exec-maven-plugin.version@ @slf4j.version@ @@ -62,7 +62,7 @@ org.codehaus.mojo exec-maven-plugin - ${maven-exec-plugin.version} + ${exec-maven-plugin.version} false From bf7770d222ac2a9b89633e8287e26e7c84196d17 Mon Sep 17 00:00:00 2001 From: Chanseok Oh Date: Wed, 2 May 2018 14:08:17 -0400 Subject: [PATCH 15/25] Fix test --- .../src/test/resources/projects/basic/reference/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml index 506665830a..daf87595b7 100644 --- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml +++ b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml @@ -26,7 +26,7 @@ UTF-8 @maven-compiler-plugin.version@ - @maven-exec-plugin.version@ + @exec-maven-plugin.version@ @slf4j.version@ @@ -62,7 +62,7 @@ org.codehaus.mojo exec-maven-plugin - ${maven-exec-plugin.version} + ${exec-maven-plugin.version} false From 568a4ed5eb3c6908a5f882281da98572826d9512 Mon Sep 17 00:00:00 2001 From: Pablo Date: Mon, 25 Jun 2018 13:34:41 -0700 Subject: [PATCH 16/25] Dataflow SDK Release 2.5.0 --- .../src/main/java/DebuggingWordCount.java | 11 ++++++--- .../src/main/java/WindowedWordCount.java | 13 +++++++---- .../src/main/java/WordCount.java | 21 ++++++++++------- .../main/java/complete/game/GameStats.java | 2 +- .../java/complete/game/HourlyTeamScore.java | 4 ++-- .../main/java/complete/game/LeaderBoard.java | 2 +- .../main/java/complete/game/UserScore.java | 5 ++-- .../java/complete/game/injector/Injector.java | 13 ++++++----- .../complete/game/injector/InjectorUtils.java | 1 + .../complete/game/utils/GameConstants.java | 2 +- .../java/complete/game/utils/WriteToText.java | 4 ++-- .../src/test/java/DebuggingWordCountTest.java | 4 ++-- .../java/complete/game/UserScoreTest.java | 8 +++---- pom.xml | 23 ++++++++++++------- 14 files changed, 69 insertions(+), 44 deletions(-) diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java index 07870f2ed0..0ae31d575d 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java @@ -130,9 +130,7 @@ public interface WordCountOptions extends WordCount.WordCountOptions { void setFilterPattern(String value); } - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); + static void runDebuggingWordCount(WordCountOptions options) { Pipeline p = Pipeline.create(options); PCollection> filteredWords = @@ -159,4 +157,11 @@ public static void main(String[] args) { p.run().waitUntilFinish(); } + + public static void main(String[] args) { + WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() + .as(WordCountOptions.class); + + runDebuggingWordCount(options); + } } diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java index 501ac27881..5798f290eb 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java @@ -106,7 +106,7 @@ static class AddTimestampFn extends DoFn { } @ProcessElement - public void processElement(ProcessContext c) { + public void processElement(@Element String element, OutputReceiver receiver) { Instant randomTimestamp = new Instant( ThreadLocalRandom.current() @@ -115,7 +115,7 @@ public void processElement(ProcessContext c) { /** * Concept #2: Set the data element with that timestamp. */ - c.outputWithTimestamp(c.element(), new Instant(randomTimestamp)); + receiver.outputWithTimestamp(element, new Instant(randomTimestamp)); } } @@ -165,8 +165,7 @@ public interface Options extends WordCount.WordCountOptions, void setNumShards(Integer numShards); } - public static void main(String[] args) throws IOException { - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); + static void runWindowedWordCount(Options options) throws IOException { final String output = options.getOutput(); final Instant minTimestamp = new Instant(options.getMinTimestampMillis()); final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis()); @@ -217,4 +216,10 @@ public static void main(String[] args) throws IOException { } } + public static void main(String[] args) throws IOException { + Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); + + runWindowedWordCount(options); + } + } diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java index 33f7b39f19..d4302ed67a 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java @@ -93,19 +93,19 @@ static class ExtractWordsFn extends DoFn { ExtractWordsFn.class, "lineLenDistro"); @ProcessElement - public void processElement(ProcessContext c) { - lineLenDist.update(c.element().length()); - if (c.element().trim().isEmpty()) { + public void processElement(@Element String element, OutputReceiver receiver) { + lineLenDist.update(element.length()); + if (element.trim().isEmpty()) { emptyLines.inc(); } // Split the line into words. - String[] words = c.element().split(ExampleUtils.TOKENIZER_PATTERN); + String[] words = element.split(ExampleUtils.TOKENIZER_PATTERN, -1); // Output each word encountered into the output PCollection. for (String word : words) { if (!word.isEmpty()) { - c.output(word); + receiver.output(word); } } } @@ -172,9 +172,7 @@ public interface WordCountOptions extends PipelineOptions { void setOutput(String value); } - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); + static void runWordCount(WordCountOptions options) { Pipeline p = Pipeline.create(options); // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the @@ -186,4 +184,11 @@ public static void main(String[] args) { p.run().waitUntilFinish(); } + + public static void main(String[] args) { + WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() + .as(WordCountOptions.class); + + runWordCount(options); + } } diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java index 3cb04bd2e4..2660cdac2b 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java @@ -153,7 +153,7 @@ public void processElement(ProcessContext c, BoundedWindow window) { /** * Options supported by {@link GameStats}. */ - interface Options extends LeaderBoard.Options { + public interface Options extends LeaderBoard.Options { @Description("Numeric value of fixed window duration for user analysis, in minutes") @Default.Integer(60) Integer getFixedWindowDuration(); diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java index fe1fe99da7..05455219fc 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java @@ -76,13 +76,13 @@ public class HourlyTeamScore extends UserScore { private static DateTimeFormatter minFmt = DateTimeFormat.forPattern("yyyy-MM-dd-HH-mm") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); + .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/Los_Angeles"))); /** * Options supported by {@link HourlyTeamScore}. */ - interface Options extends UserScore.Options { + public interface Options extends UserScore.Options { @Description("Numeric value of fixed window duration, in minutes") @Default.Integer(60) diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java index ae32637e15..b5983fa789 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java @@ -96,7 +96,7 @@ public class LeaderBoard extends HourlyTeamScore { /** * Options supported by {@link LeaderBoard}. */ - interface Options extends HourlyTeamScore.Options, ExampleOptions, StreamingOptions { + public interface Options extends HourlyTeamScore.Options, ExampleOptions, StreamingOptions { @Description("BigQuery Dataset to write tables to. Must already exist.") @Validation.Required diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java index f7aa8ff8c0..3459d043f5 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java @@ -101,7 +101,7 @@ public Integer getScore() { return this.score; } public String getKey(String keyname) { - if (keyname.equals("team")) { + if ("team".equals(keyname)) { return this.team; } else { // return username as default return this.user; @@ -128,7 +128,8 @@ static class ParseEventFn extends DoFn { @ProcessElement public void processElement(ProcessContext c) { - String[] components = c.element().split(","); + System.out.println("GOT " + c.element()); + String[] components = c.element().split(",", -1); try { String user = components[0].trim(); String team = components[1].trim(); diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java index 952cb6fc34..c21ec2e319 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java @@ -90,6 +90,8 @@ class Injector { private static final int THREAD_SLEEP_MS = 500; // Lists used to generate random team names. + // If COLORS is changed, please also make changes in + // release/src/main/groovy/MobileGamingCommands.COLORS private static final ArrayList COLORS = new ArrayList<>( Arrays.asList( @@ -349,12 +351,11 @@ public static void publishDataToFile(String fileName, int numMessages, int delay out.println(message); } } catch (Exception e) { + System.err.print("Error in writing generated events to file"); e.printStackTrace(); } finally { - if (out != null) { - out.flush(); - out.close(); - } + out.flush(); + out.close(); } } @@ -371,7 +372,7 @@ public static void main(String[] args) throws IOException, InterruptedException String fileName = args[2]; // The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if // specified; otherwise, it will try to write to a file. - if (topicName.equalsIgnoreCase("none")) { + if ("none".equalsIgnoreCase(topicName)) { writeToFile = true; writeToPubsub = false; } @@ -383,7 +384,7 @@ public static void main(String[] args) throws IOException, InterruptedException InjectorUtils.createTopic(pubsub, topic); System.out.println("Injecting to topic: " + topic); } else { - if (fileName.equalsIgnoreCase("none")) { + if ("none".equalsIgnoreCase(fileName)) { System.out.println("Filename not specified."); System.exit(1); } diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java index ddcbff4f41..5a0cf0166e 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java @@ -86,6 +86,7 @@ public static String getFullyQualifiedTopicName( */ public static void createTopic(Pubsub client, String fullTopicName) throws IOException { + System.out.println("fullTopicName " + fullTopicName); try { client.projects().topics().get(fullTopicName).execute(); } catch (GoogleJsonResponseException e) { diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java index 93da132690..dc28ad72ea 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java @@ -31,5 +31,5 @@ public class GameConstants { public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); + .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/Los_Angeles"))); } diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java index 45135fb059..76fa3ff075 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java @@ -52,7 +52,7 @@ public class WriteToText private static final DateTimeFormatter formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); + .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/Los_Angeles"))); protected String filenamePrefix; protected Map> fieldFn; @@ -98,7 +98,7 @@ public void processElement(ProcessContext c, BoundedWindow window) { * A {@link DoFn} that writes elements to files with names deterministically derived from the * lower and upper bounds of their key (an {@link IntervalWindow}). */ - protected class WriteOneFilePerWindow extends PTransform, PDone> { + protected static class WriteOneFilePerWindow extends PTransform, PDone> { private final String filenamePrefix; diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java index 26e1498d71..0fbee20cb5 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java @@ -37,7 +37,7 @@ public class DebuggingWordCountTest { private String getFilePath(String filePath) { if (filePath.contains(":")) { - return filePath.replace("\\", "/").split(":")[1]; + return filePath.replace("\\", "/").split(":", -1)[1]; } return filePath; } @@ -54,6 +54,6 @@ public void testDebuggingWordCount() throws Exception { TestPipeline.testingPipelineOptions().as(WordCountOptions.class); options.setInputFile(getFilePath(inputFile.getAbsolutePath())); options.setOutput(getFilePath(outputFile.getAbsolutePath())); - DebuggingWordCount.main(TestPipeline.convertToArgs(options)); + DebuggingWordCount.runDebuggingWordCount(options); } } diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java index 83b8821480..b691a0cbd5 100644 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java +++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java @@ -91,10 +91,10 @@ public void testParseEventFn() throws Exception { DoFnTester.of(new ParseEventFn()); List results = parseEventFn.processBundle(GAME_EVENTS_ARRAY); - Assert.assertEquals(results.size(), 8); - Assert.assertEquals(results.get(0).getUser(), "user0_MagentaKangaroo"); - Assert.assertEquals(results.get(0).getTeam(), "MagentaKangaroo"); - Assert.assertEquals(results.get(0).getScore(), new Integer(3)); + Assert.assertEquals(8, results.size()); + Assert.assertEquals("user0_MagentaKangaroo", results.get(0).getUser()); + Assert.assertEquals("MagentaKangaroo", results.get(0).getTeam()); + Assert.assertEquals(Integer.valueOf(3), results.get(0).getScore()); } /** Tests ExtractAndSumScore("user"). */ diff --git a/pom.xml b/pom.xml index 4b3108219d..6cf1ef6b6c 100644 --- a/pom.xml +++ b/pom.xml @@ -105,23 +105,24 @@ ${maven.build.timestamp} yyyy-MM-dd HH:mm - 2.4.0 + 2.5.0 Google Cloud Dataflow SDK for Java beam-${beam.version} 6 1 - v2-rev374-1.22.0 + v2-rev374-1.23.0 8.7 1.0.0 - 1.22.0 + 1.23.0 20.0 1.3 2.4 4.12 + 1.0.0 1.9.5 - v1-rev10-1.22.0 + v1-rev382-1.23.0 1.7.25 2.4 @@ -129,16 +130,16 @@ 2.20.1 2.4 3.0.0 - 3.0.0 + 3.1.0 3.7.0 - 3.0.2 + 3.1.1 2.2 3.0.2 3.0.0-M1 2.5.3 - 3.0.2 + 3.1.0 3.1.0 - 2.20.1 + 2.21.0 3.0.1 @@ -431,6 +432,12 @@ ${beam.version} + + org.apache.beam + beam-sdks-java-io-kafka + ${beam.version} + + junit junit From 0e6e1039a276ed183320e51228944daa5542b697 Mon Sep 17 00:00:00 2001 From: Pablo Date: Mon, 25 Jun 2018 14:25:39 -0700 Subject: [PATCH 17/25] [maven-release-plugin] prepare branch release-2.5.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6cf1ef6b6c..43b148a3fe 100644 --- a/pom.xml +++ b/pom.xml @@ -54,7 +54,7 @@ scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - HEAD + release-2.5.0 From eef52e96134085ecbf618b247308b9be0b567789 Mon Sep 17 00:00:00 2001 From: Pablo Date: Mon, 25 Jun 2018 14:25:39 -0700 Subject: [PATCH 18/25] [maven-release-plugin] prepare for next development iteration --- examples/pom.xml | 2 +- maven-archetypes/examples/pom.xml | 2 +- maven-archetypes/pom.xml | 2 +- maven-archetypes/starter/pom.xml | 2 +- pom.xml | 4 ++-- sdk/pom.xml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/pom.xml b/examples/pom.xml index 54b3613909..468d87505e 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT google-cloud-dataflow-java-examples-all diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml index 2c4a6eb9fe..792eb40c92 100644 --- a/maven-archetypes/examples/pom.xml +++ b/maven-archetypes/examples/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/pom.xml b/maven-archetypes/pom.xml index 95f647d608..f995770ea6 100644 --- a/maven-archetypes/pom.xml +++ b/maven-archetypes/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml index 4e91b8de88..643cfa4096 100644 --- a/maven-archetypes/starter/pom.xml +++ b/maven-archetypes/starter/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 43b148a3fe..2924ff9fa2 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ http://cloud.google.com/dataflow 2013 - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT @@ -54,7 +54,7 @@ scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - release-2.5.0 + HEAD diff --git a/sdk/pom.xml b/sdk/pom.xml index 475af49854..21e2a2df0f 100644 --- a/sdk/pom.xml +++ b/sdk/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT google-cloud-dataflow-java-sdk-all From 422c8eb2b6e0959627af33375ddcdfe8c98cab8c Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 26 Jun 2018 10:13:30 -0700 Subject: [PATCH 19/25] Adding Kafka IO to Dataflow SDK dependencies --- sdk/pom.xml | 5 +++++ .../java/com/google/cloud/dataflow/sdk/SdkDependencies.java | 2 ++ 2 files changed, 7 insertions(+) diff --git a/sdk/pom.xml b/sdk/pom.xml index 21e2a2df0f..0bd69dc58c 100644 --- a/sdk/pom.xml +++ b/sdk/pom.xml @@ -61,6 +61,11 @@ beam-runners-google-cloud-dataflow-java + + org.apache.beam + beam-sdks-java-io-kafka + + junit junit diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java index 7bbfbe3729..df3fd76ae6 100644 --- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java +++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java @@ -19,6 +19,7 @@ import org.apache.beam.runners.direct.DirectRunner; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; +import org.apache.beam.sdk.io.kafka.KafkaIO; /** * Mark the dependencies as used at compile time. @@ -26,6 +27,7 @@ class SdkDependencies { private Pipeline p; private BigQueryIO bigQueryIO; + private KafkaIO kafkaIO; private DirectRunner directRunner; private DataflowRunner dataflowRunner; } From 0968379b3d63e3a3e181d7229cba2e8a2b6c3290 Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 26 Jun 2018 11:15:47 -0700 Subject: [PATCH 20/25] Revert "[maven-release-plugin] prepare for next development iteration" This reverts commit eef52e96134085ecbf618b247308b9be0b567789. --- examples/pom.xml | 2 +- maven-archetypes/examples/pom.xml | 2 +- maven-archetypes/pom.xml | 2 +- maven-archetypes/starter/pom.xml | 2 +- pom.xml | 4 ++-- sdk/pom.xml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/pom.xml b/examples/pom.xml index 468d87505e..54b3613909 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.6.0-SNAPSHOT + 2.5.0-SNAPSHOT google-cloud-dataflow-java-examples-all diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml index 792eb40c92..2c4a6eb9fe 100644 --- a/maven-archetypes/examples/pom.xml +++ b/maven-archetypes/examples/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.6.0-SNAPSHOT + 2.5.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/pom.xml b/maven-archetypes/pom.xml index f995770ea6..95f647d608 100644 --- a/maven-archetypes/pom.xml +++ b/maven-archetypes/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.6.0-SNAPSHOT + 2.5.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml index 643cfa4096..4e91b8de88 100644 --- a/maven-archetypes/starter/pom.xml +++ b/maven-archetypes/starter/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.6.0-SNAPSHOT + 2.5.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 2924ff9fa2..43b148a3fe 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ http://cloud.google.com/dataflow 2013 - 2.6.0-SNAPSHOT + 2.5.0-SNAPSHOT @@ -54,7 +54,7 @@ scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - HEAD + release-2.5.0 diff --git a/sdk/pom.xml b/sdk/pom.xml index 0bd69dc58c..d54342364c 100644 --- a/sdk/pom.xml +++ b/sdk/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.6.0-SNAPSHOT + 2.5.0-SNAPSHOT google-cloud-dataflow-java-sdk-all From 279fda33114999da2d9158a51a79acdc36a4108c Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 26 Jun 2018 11:17:54 -0700 Subject: [PATCH 21/25] Revert "[maven-release-plugin] prepare branch release-2.5.0" This reverts commit 0e6e1039a276ed183320e51228944daa5542b697. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 43b148a3fe..6cf1ef6b6c 100644 --- a/pom.xml +++ b/pom.xml @@ -54,7 +54,7 @@ scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - release-2.5.0 + HEAD From e598e911b554e0150853aefff06a510fdbc1fe69 Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 26 Jun 2018 13:04:23 -0700 Subject: [PATCH 22/25] [maven-release-plugin] prepare branch release-2.5.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6cf1ef6b6c..43b148a3fe 100644 --- a/pom.xml +++ b/pom.xml @@ -54,7 +54,7 @@ scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - HEAD + release-2.5.0 From 0668997c78639ee05d41a2c1ba79899ebba37649 Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 26 Jun 2018 13:04:23 -0700 Subject: [PATCH 23/25] [maven-release-plugin] prepare for next development iteration --- examples/pom.xml | 2 +- maven-archetypes/examples/pom.xml | 2 +- maven-archetypes/pom.xml | 2 +- maven-archetypes/starter/pom.xml | 2 +- pom.xml | 4 ++-- sdk/pom.xml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/pom.xml b/examples/pom.xml index 54b3613909..468d87505e 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT google-cloud-dataflow-java-examples-all diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml index 2c4a6eb9fe..792eb40c92 100644 --- a/maven-archetypes/examples/pom.xml +++ b/maven-archetypes/examples/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/pom.xml b/maven-archetypes/pom.xml index 95f647d608..f995770ea6 100644 --- a/maven-archetypes/pom.xml +++ b/maven-archetypes/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml index 4e91b8de88..643cfa4096 100644 --- a/maven-archetypes/starter/pom.xml +++ b/maven-archetypes/starter/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-archetypes-parent - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 43b148a3fe..2924ff9fa2 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ http://cloud.google.com/dataflow 2013 - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT @@ -54,7 +54,7 @@ scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - release-2.5.0 + HEAD diff --git a/sdk/pom.xml b/sdk/pom.xml index d54342364c..0bd69dc58c 100644 --- a/sdk/pom.xml +++ b/sdk/pom.xml @@ -20,7 +20,7 @@ com.google.cloud.dataflow google-cloud-dataflow-java-sdk-parent - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT google-cloud-dataflow-java-sdk-all From 913d720638a37f5ea150cde5734c731192e5ab16 Mon Sep 17 00:00:00 2001 From: Ahmet Altay Date: Wed, 25 Jul 2018 10:40:55 -0700 Subject: [PATCH 24/25] Clean up, and point to Beam locations. --- .gitattributes | 40 -- .gitignore | 30 -- .travis.yml | 54 --- CONTRIBUTING.md | 51 -- LICENSE | 202 -------- NOTICE | 5 - examples/pom.xml | 46 -- .../dataflow/sdk/ExamplesDependencies.java | 29 -- maven-archetypes/examples/pom.xml | 108 ----- .../META-INF/maven/archetype-metadata.xml | 44 -- .../examples/src/main/resources/NOTICE | 5 - .../resources/archetype-resources/pom.xml | 262 ---------- .../src/main/java/DebuggingWordCount.java | 167 ------- .../src/main/java/MinimalWordCount.java | 119 ----- .../src/main/java/WindowedWordCount.java | 225 --------- .../src/main/java/WordCount.java | 194 -------- .../common/ExampleBigQueryTableOptions.java | 55 --- .../src/main/java/common/ExampleOptions.java | 37 -- ...mplePubsubTopicAndSubscriptionOptions.java | 45 -- .../common/ExamplePubsubTopicOptions.java | 45 -- .../src/main/java/common/ExampleUtils.java | 407 ---------------- .../java/common/WriteOneFilePerWindow.java | 117 ----- .../main/java/complete/game/GameStats.java | 346 -------------- .../java/complete/game/HourlyTeamScore.java | 182 ------- .../main/java/complete/game/LeaderBoard.java | 306 ------------ .../java/complete/game/StatefulTeamScore.java | 227 --------- .../main/java/complete/game/UserScore.java | 229 --------- .../java/complete/game/injector/Injector.java | 439 ----------------- .../complete/game/injector/InjectorUtils.java | 101 ---- .../injector/RetryHttpInitializerWrapper.java | 115 ----- .../complete/game/utils/GameConstants.java | 35 -- .../complete/game/utils/WriteToBigQuery.java | 145 ------ .../java/complete/game/utils/WriteToText.java | 183 ------- .../game/utils/WriteWindowedToBigQuery.java | 71 --- .../src/test/java/DebuggingWordCountTest.java | 59 --- .../src/test/java/MinimalWordCountTest.java | 94 ---- .../src/test/java/WordCountTest.java | 85 ---- .../java/complete/game/GameStatsTest.java | 81 ---- .../complete/game/HourlyTeamScoreTest.java | 116 ----- .../java/complete/game/LeaderBoardTest.java | 368 -------------- .../complete/game/StatefulTeamScoreTest.java | 206 -------- .../java/complete/game/UserScoreTest.java | 154 ------ .../projects/basic/archetype.properties | 19 - .../test/resources/projects/basic/goal.txt | 1 - maven-archetypes/pom.xml | 92 ---- maven-archetypes/starter/pom.xml | 93 ---- .../META-INF/maven/archetype-metadata.xml | 36 -- .../starter/src/main/resources/NOTICE | 5 - .../resources/archetype-resources/pom.xml | 93 ---- .../src/main/java/StarterPipeline.java | 69 --- .../projects/basic/archetype.properties | 19 - .../test/resources/projects/basic/goal.txt | 1 - .../projects/basic/reference/pom.xml | 93 ---- .../src/main/java/it/pkg/StarterPipeline.java | 69 --- pom.xml | 449 ------------------ sdk/pom.xml | 75 --- .../cloud/dataflow/sdk/SdkDependencies.java | 33 -- .../dataflow/dataflow-distribution.properties | 20 - .../DataflowRunnerInfoOverrideTest.java | 57 --- sdk/suppressions.xml | 30 -- 60 files changed, 7083 deletions(-) delete mode 100644 .gitattributes delete mode 100644 .gitignore delete mode 100644 .travis.yml delete mode 100644 CONTRIBUTING.md delete mode 100644 LICENSE delete mode 100644 NOTICE delete mode 100644 examples/pom.xml delete mode 100644 examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java delete mode 100644 maven-archetypes/examples/pom.xml delete mode 100644 maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml delete mode 100644 maven-archetypes/examples/src/main/resources/NOTICE delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/StatefulTeamScore.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/MinimalWordCountTest.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java delete mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java delete mode 100644 maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties delete mode 100644 maven-archetypes/examples/src/test/resources/projects/basic/goal.txt delete mode 100644 maven-archetypes/pom.xml delete mode 100644 maven-archetypes/starter/pom.xml delete mode 100644 maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml delete mode 100644 maven-archetypes/starter/src/main/resources/NOTICE delete mode 100644 maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml delete mode 100644 maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java delete mode 100644 maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties delete mode 100644 maven-archetypes/starter/src/test/resources/projects/basic/goal.txt delete mode 100644 maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml delete mode 100644 maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java delete mode 100644 pom.xml delete mode 100644 sdk/pom.xml delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java delete mode 100644 sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow-distribution.properties delete mode 100644 sdk/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerInfoOverrideTest.java delete mode 100644 sdk/suppressions.xml diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index c39158cf00..0000000000 --- a/.gitattributes +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -# The default behavior, which overrides 'core.autocrlf', is to use Git's -# built-in heuristics to determine whether a particular file is text or binary. -# Text files are automatically normalized to the user's platforms. -* text=auto - -# Explicitly declare text files that should always be normalized and converted -# to native line endings. -.gitattributes text -.gitignore text -LICENSE text -*.avsc text -*.html text -*.java text -*.md text -*.properties text -*.proto text -*.py text -*.sh text -*.xml text -*.yml text - -# Declare files that will always have CRLF line endings on checkout. -# *.sln text eol=crlf - -# Explicitly denote all files that are truly binary and should not be modified. -# *.jpg binary diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 2a27023c28..0000000000 --- a/.gitignore +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -target/ - -# Ignore IntelliJ files. -.idea/ -*.iml -*.ipr -*.iws - -# Ignore Eclipse files. -.classpath -.project -.settings/ - -# The build process generates the dependency-reduced POM, but it shouldn't be -# committed. -dependency-reduced-pom.xml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 8fa5d9a932..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -language: java - -sudo: false - -notifications: - email: - # Group email notifications are disabled for now, since we cannot do it on a per-branch basis. - # Right now, it would trigger a notification for each fork, which generates a lot of spam. - # recipients: - # - dataflow-sdk-build-notifications+travis@google.com - on_success: change - on_failure: always - -matrix: - include: - # On OSX, run with default JDK only. - - os: osx - # On Linux, run with specific JDKs only. - - os: linux - env: CUSTOM_JDK="oraclejdk8" - # The distribution does not build with Java 7 by design. We need to rewrite these tests - # to, for example, build and install with Java 8 and then test examples with Java 7. - # - os: linux - # env: CUSTOM_JDK="oraclejdk7" - # - os: linux - # env: CUSTOM_JDK="openjdk7" - -before_install: - - if [ "$TRAVIS_OS_NAME" == "osx" ]; then export JAVA_HOME=$(/usr/libexec/java_home); fi - - if [ "$TRAVIS_OS_NAME" == "linux" ]; then jdk_switcher use "$CUSTOM_JDK"; fi - -install: - - travis_retry mvn install clean -U -DskipTests=true - -script: - # Verify that the project can be built and installed. - - mvn install - # Verify that starter and examples archetypes have the correct version of the NOTICE file. - - diff -q NOTICE maven-archetypes/starter/src/main/resources/NOTICE - - diff -q NOTICE maven-archetypes/examples/src/main/resources/NOTICE diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 9b616e5fe3..0000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,51 +0,0 @@ - - -Want to contribute? Great! First, read this page (including the small print at -the end). - -Google Cloud Dataflow SDK is a distribution of Apache Beam. If you'd like to -change anything under the `org.apache.beam.*` namespace, please submit that -change directly to the [Apache Beam](https://github.com/apache/beam) project. - -This repository contains code to build the Dataflow distribution of Beam, and -some Dataflow-specific code. Only changes to how the distribution is built, or -the Dataflow-specific code under the `com.google.cloud.dataflow.*` namespace, -can be merged here. - -### Before you contribute -Before we can use your code, you must sign the -[Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual?csw=1) -(CLA), which you can do online. The CLA is necessary mainly because you own the -copyright to your changes, even after your contribution becomes part of our -codebase, so we need your permission to use and distribute your code. We also -need to be sure of various other things. For instance that you'll tell us if you -know that your code infringes on other people's patents. You don't have to sign -the CLA until after you've submitted your code for review and a member has -approved it, but you must do it before we can put your code into our codebase. - -Before you start working on a larger contribution, we recommend to get in touch -with us first through the issue tracker with your idea so that we can help out -and possibly guide you. Coordinating up front makes it much easier to avoid -frustration later on. - -### Code reviews -All submissions, including submissions by project members, require review. We -use GitHub pull requests for this purpose. - -### The small print -Contributions made by corporations are covered by a different agreement than -the one above, the Software Grant and Corporate Contributor License Agreement. diff --git a/LICENSE b/LICENSE deleted file mode 100644 index d645695673..0000000000 --- a/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/NOTICE b/NOTICE deleted file mode 100644 index 981fde5a9e..0000000000 --- a/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Google Cloud Dataflow SDK for Java -Copyright 2017, Google Inc. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/examples/pom.xml b/examples/pom.xml deleted file mode 100644 index 468d87505e..0000000000 --- a/examples/pom.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - 2.6.0-SNAPSHOT - - - google-cloud-dataflow-java-examples-all - Google Cloud Dataflow Java Examples - All - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This artifact includes all Dataflow Java SDK - examples. - - jar - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - - - - org.apache.beam - beam-examples-java - - - diff --git a/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java b/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java deleted file mode 100644 index c51e527edb..0000000000 --- a/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2017 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.dataflow.sdk; - -import org.apache.beam.examples.MinimalWordCount; -import org.apache.beam.examples.WordCount; - -/** - * Mark the examples dependencies as used at compile time. This is also needed - * to produce some content in the final JAR file. - */ -class ExamplesDependencies { - SdkDependencies sdkDependencies; - WordCount wordCount; - MinimalWordCount minimalWordCount; -} diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml deleted file mode 100644 index 792eb40c92..0000000000 --- a/maven-archetypes/examples/pom.xml +++ /dev/null @@ -1,108 +0,0 @@ - - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-archetypes-parent - 2.6.0-SNAPSHOT - ../pom.xml - - - google-cloud-dataflow-java-archetypes-examples - Google Cloud Dataflow SDK for Java - Examples Archetype - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This archetype creates a project containing all the example - pipelines. - - maven-archetype - - - - - org.apache.maven.archetype - archetype-packaging - ${archetype-packaging.version} - - - - - - - maven-archetype-plugin - ${maven-archetype-plugin.version} - - - org.apache.maven.shared - maven-invoker - ${maven-invoker.version} - - - - - - default-integration-test - install - - integration-test - - - - - - - - org.eclipse.m2e - lifecycle-mapping - ${eclipse-m2e.version} - - - - - - org.codehaus.mojo - exec-maven-plugin - [1.5.0,) - - exec - - - - - false - - - - - - - - - - - - - diff --git a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml deleted file mode 100644 index 29f8605cce..0000000000 --- a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml +++ /dev/null @@ -1,44 +0,0 @@ - - - - - - - 1.8 - - - - - - src/main/java - - **/*.java - - - - - src/test/java - - **/*.java - - - - diff --git a/maven-archetypes/examples/src/main/resources/NOTICE b/maven-archetypes/examples/src/main/resources/NOTICE deleted file mode 100644 index 981fde5a9e..0000000000 --- a/maven-archetypes/examples/src/main/resources/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Google Cloud Dataflow SDK for Java -Copyright 2017, Google Inc. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml deleted file mode 100644 index 04d70618d9..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml +++ /dev/null @@ -1,262 +0,0 @@ - - - - 4.0.0 - - ${groupId} - ${artifactId} - ${version} - - jar - - - UTF-8 - - @bigquery.version@ - @google-clients.version@ - @guava.version@ - @hamcrest.version@ - @joda.version@ - @junit.version@ - @maven-compiler-plugin.version@ - @exec-maven-plugin.version@ - @maven-jar-plugin.version@ - @maven-shade-plugin.version@ - @mockito.version@ - @pubsub.version@ - @slf4j.version@ - @surefire-plugin.version@ - - - - - ossrh.snapshots - Sonatype OSS Repository Hosting - https://oss.sonatype.org/content/repositories/snapshots/ - - false - - - true - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - ${maven-compiler-plugin.version} - - ${targetPlatform} - ${targetPlatform} - - - - - org.apache.maven.plugins - maven-surefire-plugin - ${surefire-plugin.version} - - all - 4 - true - - - - org.apache.maven.surefire - surefire-junit47 - ${surefire-plugin.version} - - - - - - - org.apache.maven.plugins - maven-jar-plugin - ${maven-jar-plugin.version} - - - - - org.apache.maven.plugins - maven-shade-plugin - ${maven-shade-plugin.version} - - - package - - shade - - - ${project.artifactId}-bundled-${project.version} - - - *:* - - META-INF/LICENSE - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - - - - - - - org.codehaus.mojo - exec-maven-plugin - ${exec-maven-plugin.version} - - false - - - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - @project.version@ - - - - - com.google.api-client - google-api-client - ${google-clients.version} - - - - com.google.guava - guava-jdk5 - - - - - - com.google.apis - google-api-services-bigquery - ${bigquery.version} - - - - com.google.guava - guava-jdk5 - - - - - - com.google.http-client - google-http-client - ${google-clients.version} - - - - com.google.guava - guava-jdk5 - - - - - - com.google.apis - google-api-services-pubsub - ${pubsub.version} - - - - com.google.guava - guava-jdk5 - - - - - - joda-time - joda-time - ${joda.version} - - - - com.google.guava - guava - ${guava.version} - - - - - org.slf4j - slf4j-api - ${slf4j.version} - - - - org.slf4j - slf4j-jdk14 - ${slf4j.version} - - runtime - - - - - org.hamcrest - hamcrest-all - ${hamcrest.version} - - - - junit - junit - ${junit.version} - - - - org.mockito - mockito-core - ${mockito.version} - test - - - diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java deleted file mode 100644 index 0ae31d575d..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import java.util.List; -import java.util.regex.Pattern; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * An example that verifies word counts in Shakespeare and includes Beam best practices. - * - *

This class, {@link DebuggingWordCount}, is the third in a series of four successively more - * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount} - * and {@link WordCount}. After you've looked at this example, then see the - * {@link WindowedWordCount} pipeline, for introduction of additional concepts. - * - *

Basic concepts, also in the MinimalWordCount and WordCount examples: - * Reading text files; counting a PCollection; executing a Pipeline both locally - * and using a selected runner; defining DoFns. - * - *

New Concepts: - *

- *   1. Logging using SLF4J, even in a distributed environment
- *   2. Creating a custom metric (runners have varying levels of support)
- *   3. Testing your Pipeline via PAssert
- * 
- * - *

To execute this pipeline locally, specify general pipeline configuration: - *

{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * 
- * - *

To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - * - */ -public class DebuggingWordCount { - /** A DoFn that filters for a specific key based upon a regular expression. */ - public static class FilterTextFn extends DoFn, KV> { - /** - * Concept #1: The logger below uses the fully qualified class name of FilterTextFn as the - * logger. Depending on your SLF4J configuration, log statements will likely be qualified by - * this name. - * - *

Note that this is entirely standard SLF4J usage. Some runners may provide a default SLF4J - * configuration that is most appropriate for their logging integration. - */ - private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class); - - private final Pattern filter; - public FilterTextFn(String pattern) { - filter = Pattern.compile(pattern); - } - - /** - * Concept #2: A custom metric can track values in your pipeline as it runs. Each - * runner provides varying levels of support for metrics, and may expose them - * in a dashboard, etc. - */ - private final Counter matchedWords = Metrics.counter(FilterTextFn.class, "matchedWords"); - private final Counter unmatchedWords = Metrics.counter(FilterTextFn.class, "unmatchedWords"); - - @ProcessElement - public void processElement(ProcessContext c) { - if (filter.matcher(c.element().getKey()).matches()) { - // Log at the "DEBUG" level each element that we match. When executing this pipeline - // these log lines will appear only if the log level is set to "DEBUG" or lower. - LOG.debug("Matched: " + c.element().getKey()); - matchedWords.inc(); - c.output(c.element()); - } else { - // Log at the "TRACE" level each element that is not matched. Different log levels - // can be used to control the verbosity of logging providing an effective mechanism - // to filter less important information. - LOG.trace("Did not match: " + c.element().getKey()); - unmatchedWords.inc(); - } - } - } - - /** - * Options supported by {@link DebuggingWordCount}. - * - *

Inherits standard configuration options and all options defined in - * {@link WordCount.WordCountOptions}. - */ - public interface WordCountOptions extends WordCount.WordCountOptions { - - @Description("Regex filter pattern to use in DebuggingWordCount. " - + "Only words matching this pattern will be counted.") - @Default.String("Flourish|stomach") - String getFilterPattern(); - void setFilterPattern(String value); - } - - static void runDebuggingWordCount(WordCountOptions options) { - Pipeline p = Pipeline.create(options); - - PCollection> filteredWords = - p.apply("ReadLines", TextIO.read().from(options.getInputFile())) - .apply(new WordCount.CountWords()) - .apply(ParDo.of(new FilterTextFn(options.getFilterPattern()))); - - /** - * Concept #3: PAssert is a set of convenient PTransforms in the style of - * Hamcrest's collection matchers that can be used when writing Pipeline level tests - * to validate the contents of PCollections. PAssert is best used in unit tests - * with small data sets but is demonstrated here as a teaching tool. - * - *

Below we verify that the set of filtered words matches our expected counts. Note - * that PAssert does not provide any output and that successful completion of the - * Pipeline implies that the expectations were met. Learn more at - * https://beam.apache.org/documentation/pipelines/test-your-pipeline/ on how to test - * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test. - */ - List> expectedResults = Arrays.asList( - KV.of("Flourish", 3L), - KV.of("stomach", 1L)); - PAssert.that(filteredWords).containsInAnyOrder(expectedResults); - - p.run().waitUntilFinish(); - } - - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); - - runDebuggingWordCount(options); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java deleted file mode 100644 index f1bd8bfaa8..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.FlatMapElements; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.TypeDescriptors; - -/** - * An example that counts words in Shakespeare. - * - *

This class, {@link MinimalWordCount}, is the first in a series of four successively more - * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or - * argument processing, and focus on construction of the pipeline, which chains together the - * application of core transforms. - * - *

Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally the - * {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional - * concepts. - * - *

Concepts: - * - *

- *   1. Reading data from text files
- *   2. Specifying 'inline' transforms
- *   3. Counting items in a PCollection
- *   4. Writing data to text files
- * 
- * - *

No arguments are required to run this pipeline. It will be executed with the DirectRunner. You - * can see the results in the output files in your current working directory, with names like - * "wordcounts-00001-of-00005. When running on a distributed service, you would use an appropriate - * file service. - */ -public class MinimalWordCount { - - public static void main(String[] args) { - - // Create a PipelineOptions object. This object lets us set various execution - // options for our pipeline, such as the runner you wish to use. This example - // will run with the DirectRunner by default, based on the class path configured - // in its dependencies. - PipelineOptions options = PipelineOptionsFactory.create(); - - // In order to run your pipeline, you need to make following runner specific changes: - // - // CHANGE 1/3: Select a Beam runner, such as BlockingDataflowRunner - // or FlinkRunner. - // CHANGE 2/3: Specify runner-required options. - // For BlockingDataflowRunner, set project and temp location as follows: - // DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class); - // dataflowOptions.setRunner(BlockingDataflowRunner.class); - // dataflowOptions.setProject("SET_YOUR_PROJECT_ID_HERE"); - // dataflowOptions.setTempLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_TEMP_DIRECTORY"); - // For FlinkRunner, set the runner as follows. See {@code FlinkPipelineOptions} - // for more details. - // options.as(FlinkPipelineOptions.class) - // .setRunner(FlinkRunner.class); - - // Create the Pipeline object with the options we defined above - Pipeline p = Pipeline.create(options); - - // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set - // of input text files. TextIO.Read returns a PCollection where each element is one line from - // the input text (a set of Shakespeare's texts). - - // This example reads a public data set consisting of the complete works of Shakespeare. - p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) - - // Concept #2: Apply a FlatMapElements transform the PCollection of text lines. - // This transform splits the lines in PCollection, where each element is an - // individual word in Shakespeare's collected texts. - .apply(FlatMapElements - .into(TypeDescriptors.strings()) - .via((String word) -> Arrays.asList(word.split("[^\\p{L}]+")))) - // We use a Filter transform to avoid empty word - .apply(Filter.by((String word) -> !word.isEmpty())) - // Concept #3: Apply the Count transform to our PCollection of individual words. The Count - // transform returns a new PCollection of key/value pairs, where each key represents a - // unique word in the text. The associated value is the occurrence count for that word. - .apply(Count.perElement()) - // Apply a MapElements transform that formats our PCollection of word counts into a - // printable string, suitable for writing to an output file. - .apply(MapElements - .into(TypeDescriptors.strings()) - .via((KV wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())) - // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline. - // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of - // formatted strings) to a series of text files. - // - // By default, it will write to a set of files with names like wordcounts-00001-of-00005 - .apply(TextIO.write().to("wordcounts")); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java deleted file mode 100644 index 5798f290eb..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.io.IOException; -import java.util.concurrent.ThreadLocalRandom; -import ${package}.common.ExampleBigQueryTableOptions; -import ${package}.common.ExampleOptions; -import ${package}.common.WriteOneFilePerWindow; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.joda.time.Duration; -import org.joda.time.Instant; - -/** - * An example that counts words in text, and can run over either unbounded or bounded input - * collections. - * - *

This class, {@link WindowedWordCount}, is the last in a series of four successively more - * detailed 'word count' examples. First take a look at {@link MinimalWordCount}, - * {@link WordCount}, and {@link DebuggingWordCount}. - * - *

Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples: - * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally - * and using a selected runner; defining DoFns; - * user-defined PTransforms; defining PipelineOptions. - * - *

New Concepts: - *

- *   1. Unbounded and bounded pipeline input modes
- *   2. Adding timestamps to data
- *   3. Windowing
- *   4. Re-using PTransforms over windowed PCollections
- *   5. Accessing the window of an element
- *   6. Writing data to per-window text files
- * 
- * - *

By default, the examples will run with the {@code DirectRunner}. - * To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * See examples/java/README.md for instructions about how to configure different runners. - * - *

To execute this pipeline locally, specify a local output file (if using the - * {@code DirectRunner}) or output prefix on a supported distributed file system. - *

{@code
- *   --output=[YOUR_LOCAL_FILE | YOUR_OUTPUT_PREFIX]
- * }
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - * - *

By default, the pipeline will do fixed windowing, on 1-minute windows. You can - * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10} - * for 10-minute windows. - * - *

The example will try to cancel the pipeline on the signal to terminate the process (CTRL-C). - */ -public class WindowedWordCount { - static final int WINDOW_SIZE = 10; // Default window duration in minutes - /** - * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for - * this example, for the bounded data case. - * - *

Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate - * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a - * 2-hour period. - */ - static class AddTimestampFn extends DoFn { - private final Instant minTimestamp; - private final Instant maxTimestamp; - - AddTimestampFn(Instant minTimestamp, Instant maxTimestamp) { - this.minTimestamp = minTimestamp; - this.maxTimestamp = maxTimestamp; - } - - @ProcessElement - public void processElement(@Element String element, OutputReceiver receiver) { - Instant randomTimestamp = - new Instant( - ThreadLocalRandom.current() - .nextLong(minTimestamp.getMillis(), maxTimestamp.getMillis())); - - /** - * Concept #2: Set the data element with that timestamp. - */ - receiver.outputWithTimestamp(element, new Instant(randomTimestamp)); - } - } - - /** A {@link DefaultValueFactory} that returns the current system time. */ - public static class DefaultToCurrentSystemTime implements DefaultValueFactory { - @Override - public Long create(PipelineOptions options) { - return System.currentTimeMillis(); - } - } - - /** A {@link DefaultValueFactory} that returns the minimum timestamp plus one hour. */ - public static class DefaultToMinTimestampPlusOneHour implements DefaultValueFactory { - @Override - public Long create(PipelineOptions options) { - return options.as(Options.class).getMinTimestampMillis() - + Duration.standardHours(1).getMillis(); - } - } - - /** - * Options for {@link WindowedWordCount}. - * - *

Inherits standard example configuration options, which allow specification of the - * runner, as well as the {@link WordCount.WordCountOptions} support for - * specification of the input and output files. - */ - public interface Options extends WordCount.WordCountOptions, - ExampleOptions, ExampleBigQueryTableOptions { - @Description("Fixed window duration, in minutes") - @Default.Integer(WINDOW_SIZE) - Integer getWindowSize(); - void setWindowSize(Integer value); - - @Description("Minimum randomly assigned timestamp, in milliseconds-since-epoch") - @Default.InstanceFactory(DefaultToCurrentSystemTime.class) - Long getMinTimestampMillis(); - void setMinTimestampMillis(Long value); - - @Description("Maximum randomly assigned timestamp, in milliseconds-since-epoch") - @Default.InstanceFactory(DefaultToMinTimestampPlusOneHour.class) - Long getMaxTimestampMillis(); - void setMaxTimestampMillis(Long value); - - @Description("Fixed number of shards to produce per window") - Integer getNumShards(); - void setNumShards(Integer numShards); - } - - static void runWindowedWordCount(Options options) throws IOException { - final String output = options.getOutput(); - final Instant minTimestamp = new Instant(options.getMinTimestampMillis()); - final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis()); - - Pipeline pipeline = Pipeline.create(options); - - /** - * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or - * unbounded input source. - */ - PCollection input = pipeline - /** Read from the GCS file. */ - .apply(TextIO.read().from(options.getInputFile())) - // Concept #2: Add an element timestamp, using an artificial time just to show windowing. - // See AddTimestampFn for more detail on this. - .apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp))); - - /** - * Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1 - * minute (you can change this with a command-line option). See the documentation for more - * information on how fixed windows work, and for information on the other types of windowing - * available (e.g., sliding windows). - */ - PCollection windowedWords = - input.apply( - Window.into(FixedWindows.of(Duration.standardMinutes(options.getWindowSize())))); - - /** - * Concept #4: Re-use our existing CountWords transform that does not have knowledge of - * windows over a PCollection containing windowed values. - */ - PCollection> wordCounts = windowedWords.apply(new WordCount.CountWords()); - - /** - * Concept #5: Format the results and write to a sharded file partitioned by window, using a - * simple ParDo operation. Because there may be failures followed by retries, the - * writes must be idempotent, but the details of writing to files is elided here. - */ - wordCounts - .apply(MapElements.via(new WordCount.FormatAsTextFn())) - .apply(new WriteOneFilePerWindow(output, options.getNumShards())); - - PipelineResult result = pipeline.run(); - try { - result.waitUntilFinish(); - } catch (Exception exc) { - result.cancel(); - } - } - - public static void main(String[] args) throws IOException { - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - - runWindowedWordCount(options); - } - -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java deleted file mode 100644 index d4302ed67a..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import ${package}.common.ExampleUtils; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Distribution; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.options.Validation.Required; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; - -/** - * An example that counts words in Shakespeare and includes Beam best practices. - * - *

This class, {@link WordCount}, is the second in a series of four successively more detailed - * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}. - * After you've looked at this example, then see the {@link DebuggingWordCount} - * pipeline, for introduction of additional concepts. - * - *

For a detailed walkthrough of this example, see - * - * https://beam.apache.org/get-started/wordcount-example/ - * - * - *

Basic concepts, also in the MinimalWordCount example: - * Reading text files; counting a PCollection; writing to text files - * - *

New Concepts: - *

- *   1. Executing a Pipeline both locally and using the selected runner
- *   2. Using ParDo with static DoFns defined out-of-line
- *   3. Building a composite transform
- *   4. Defining your own pipeline options
- * 
- * - *

Concept #1: you can execute this pipeline either locally or using by selecting another runner. - * These are now command-line options and not hard-coded as they were in the MinimalWordCount - * example. - * - *

To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * - *

To execute this pipeline, specify a local output file (if using the - * {@code DirectRunner}) or output prefix on a supported distributed file system. - *

{@code
- *   --output=[YOUR_LOCAL_FILE | YOUR_OUTPUT_PREFIX]
- * }
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - */ -public class WordCount { - - /** - * Concept #2: You can make your pipeline assembly code less verbose by defining your DoFns - * statically out-of-line. This DoFn tokenizes lines of text into individual words; we pass it - * to a ParDo in the pipeline. - */ - static class ExtractWordsFn extends DoFn { - private final Counter emptyLines = Metrics.counter(ExtractWordsFn.class, "emptyLines"); - private final Distribution lineLenDist = Metrics.distribution( - ExtractWordsFn.class, "lineLenDistro"); - - @ProcessElement - public void processElement(@Element String element, OutputReceiver receiver) { - lineLenDist.update(element.length()); - if (element.trim().isEmpty()) { - emptyLines.inc(); - } - - // Split the line into words. - String[] words = element.split(ExampleUtils.TOKENIZER_PATTERN, -1); - - // Output each word encountered into the output PCollection. - for (String word : words) { - if (!word.isEmpty()) { - receiver.output(word); - } - } - } - } - - /** A SimpleFunction that converts a Word and Count into a printable string. */ - public static class FormatAsTextFn extends SimpleFunction, String> { - @Override - public String apply(KV input) { - return input.getKey() + ": " + input.getValue(); - } - } - - /** - * A PTransform that converts a PCollection containing lines of text into a PCollection of - * formatted word counts. - * - *

Concept #3: This is a custom composite transform that bundles two transforms (ParDo and - * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse, - * modular testing, and an improved monitoring experience. - */ - public static class CountWords extends PTransform, - PCollection>> { - @Override - public PCollection> expand(PCollection lines) { - - // Convert lines of text into individual words. - PCollection words = lines.apply( - ParDo.of(new ExtractWordsFn())); - - // Count the number of times each word occurs. - PCollection> wordCounts = words.apply(Count.perElement()); - - return wordCounts; - } - } - - /** - * Options supported by {@link WordCount}. - * - *

Concept #4: Defining your own configuration options. Here, you can add your own arguments - * to be processed by the command-line parser, and specify default values for them. You can then - * access the options values in your pipeline code. - * - *

Inherits standard configuration options. - */ - public interface WordCountOptions extends PipelineOptions { - - /** - * By default, this example reads from a public dataset containing the text of - * King Lear. Set this option to choose a different input file or glob. - */ - @Description("Path of the file to read from") - @Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt") - String getInputFile(); - void setInputFile(String value); - - /** - * Set this required option to specify where to write the output. - */ - @Description("Path of the file to write to") - @Required - String getOutput(); - void setOutput(String value); - } - - static void runWordCount(WordCountOptions options) { - Pipeline p = Pipeline.create(options); - - // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the - // static FormatAsTextFn() to the ParDo transform. - p.apply("ReadLines", TextIO.read().from(options.getInputFile())) - .apply(new CountWords()) - .apply(MapElements.via(new FormatAsTextFn())) - .apply("WriteCounts", TextIO.write().to(options.getOutput())); - - p.run().waitUntilFinish(); - } - - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); - - runWordCount(options); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java deleted file mode 100644 index 57f1546e27..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import com.google.api.services.bigquery.model.TableSchema; -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure BigQuery tables in Beam examples. - * The project defaults to the project being used to run the example. - */ -public interface ExampleBigQueryTableOptions extends GcpOptions { - @Description("BigQuery dataset name") - @Default.String("beam_examples") - String getBigQueryDataset(); - void setBigQueryDataset(String dataset); - - @Description("BigQuery table name") - @Default.InstanceFactory(BigQueryTableFactory.class) - String getBigQueryTable(); - void setBigQueryTable(String table); - - @Description("BigQuery table schema") - TableSchema getBigQuerySchema(); - void setBigQuerySchema(TableSchema schema); - - /** - * Returns the job name as the default BigQuery table name. - */ - class BigQueryTableFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return options.getJobName().replace('-', '_'); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java deleted file mode 100644 index 90f935c3ce..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure the Beam examples. - */ -public interface ExampleOptions extends PipelineOptions { - @Description("Whether to keep jobs running after local process exit") - @Default.Boolean(false) - boolean getKeepJobsRunning(); - void setKeepJobsRunning(boolean keepJobsRunning); - - @Description("Number of workers to use when executing the injector pipeline") - @Default.Integer(1) - int getInjectorNumWorkers(); - void setInjectorNumWorkers(int numWorkers); -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java deleted file mode 100644 index cf142a10fd..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure Pub/Sub topic/subscription in Beam examples. - */ -public interface ExamplePubsubTopicAndSubscriptionOptions extends ExamplePubsubTopicOptions { - @Description("Pub/Sub subscription") - @Default.InstanceFactory(PubsubSubscriptionFactory.class) - String getPubsubSubscription(); - void setPubsubSubscription(String subscription); - - /** - * Returns a default Pub/Sub subscription based on the project and the job names. - */ - class PubsubSubscriptionFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return "projects/" + options.as(GcpOptions.class).getProject() - + "/subscriptions/" + options.getJobName(); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java deleted file mode 100644 index 86784b06da..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure Pub/Sub topic in Beam examples. - */ -public interface ExamplePubsubTopicOptions extends GcpOptions { - @Description("Pub/Sub topic") - @Default.InstanceFactory(PubsubTopicFactory.class) - String getPubsubTopic(); - void setPubsubTopic(String topic); - - /** - * Returns a default Pub/Sub topic based on the project and the job names. - */ - class PubsubTopicFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return "projects/" + options.as(GcpOptions.class).getProject() - + "/topics/" + options.getJobName(); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java deleted file mode 100644 index e1159b9018..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import com.google.api.client.googleapis.json.GoogleJsonResponseException; -import com.google.api.client.googleapis.services.AbstractGoogleClientRequest; -import com.google.api.client.http.HttpRequestInitializer; -import com.google.api.services.bigquery.Bigquery; -import com.google.api.services.bigquery.Bigquery.Datasets; -import com.google.api.services.bigquery.Bigquery.Tables; -import com.google.api.services.bigquery.model.Dataset; -import com.google.api.services.bigquery.model.DatasetReference; -import com.google.api.services.bigquery.model.Table; -import com.google.api.services.bigquery.model.TableReference; -import com.google.api.services.bigquery.model.TableSchema; -import com.google.api.services.pubsub.Pubsub; -import com.google.api.services.pubsub.model.Subscription; -import com.google.api.services.pubsub.model.Topic; -import com.google.auth.Credentials; -import com.google.auth.http.HttpCredentialsAdapter; -import com.google.cloud.hadoop.util.ChainingHttpRequestInitializer; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.common.util.concurrent.Uninterruptibles; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.extensions.gcp.auth.NullCredentialInitializer; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryOptions; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubOptions; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.util.BackOff; -import org.apache.beam.sdk.util.BackOffUtils; -import org.apache.beam.sdk.util.FluentBackoff; -import org.apache.beam.sdk.util.RetryHttpRequestInitializer; -import org.apache.beam.sdk.util.Sleeper; -import org.apache.beam.sdk.util.Transport; -import org.joda.time.Duration; - -/** - * The utility class that sets up and tears down external resources, - * and cancels the streaming pipelines once the program terminates. - * - *

It is used to run Beam examples. - */ -public class ExampleUtils { - - private static final int SC_NOT_FOUND = 404; - - /** - * \p{L} denotes the category of Unicode letters, - * so this pattern will match on everything that is not a letter. - * - *

It is used for tokenizing strings in the wordcount examples. - */ - public static final String TOKENIZER_PATTERN = "[^\\p{L}]+"; - - private final PipelineOptions options; - private Bigquery bigQueryClient = null; - private Pubsub pubsubClient = null; - private Set pipelinesToCancel = Sets.newHashSet(); - private List pendingMessages = Lists.newArrayList(); - - /** - * Do resources and runner options setup. - */ - public ExampleUtils(PipelineOptions options) { - this.options = options; - } - - /** - * Sets up external resources that are required by the example, - * such as Pub/Sub topics and BigQuery tables. - * - * @throws IOException if there is a problem setting up the resources - */ - public void setup() throws IOException { - Sleeper sleeper = Sleeper.DEFAULT; - BackOff backOff = - FluentBackoff.DEFAULT - .withMaxRetries(3).withInitialBackoff(Duration.millis(200)).backoff(); - Throwable lastException = null; - try { - do { - try { - setupPubsub(); - setupBigQueryTable(); - return; - } catch (GoogleJsonResponseException e) { - lastException = e; - } - } while (BackOffUtils.next(sleeper, backOff)); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - // Ignore InterruptedException - } - throw new RuntimeException(lastException); - } - - /** - * Sets up the Google Cloud Pub/Sub topic. - * - *

If the topic doesn't exist, a new topic with the given name will be created. - * - * @throws IOException if there is a problem setting up the Pub/Sub topic - */ - public void setupPubsub() throws IOException { - ExamplePubsubTopicAndSubscriptionOptions pubsubOptions = - options.as(ExamplePubsubTopicAndSubscriptionOptions.class); - if (!pubsubOptions.getPubsubTopic().isEmpty()) { - pendingMessages.add("**********************Set Up Pubsub************************"); - setupPubsubTopic(pubsubOptions.getPubsubTopic()); - pendingMessages.add("The Pub/Sub topic has been set up for this example: " - + pubsubOptions.getPubsubTopic()); - - if (!pubsubOptions.getPubsubSubscription().isEmpty()) { - setupPubsubSubscription( - pubsubOptions.getPubsubTopic(), pubsubOptions.getPubsubSubscription()); - pendingMessages.add("The Pub/Sub subscription has been set up for this example: " - + pubsubOptions.getPubsubSubscription()); - } - } - } - - /** - * Sets up the BigQuery table with the given schema. - * - *

If the table already exists, the schema has to match the given one. Otherwise, the example - * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema - * will be created. - * - * @throws IOException if there is a problem setting up the BigQuery table - */ - public void setupBigQueryTable() throws IOException { - ExampleBigQueryTableOptions bigQueryTableOptions = - options.as(ExampleBigQueryTableOptions.class); - if (bigQueryTableOptions.getBigQueryDataset() != null - && bigQueryTableOptions.getBigQueryTable() != null - && bigQueryTableOptions.getBigQuerySchema() != null) { - pendingMessages.add("******************Set Up Big Query Table*******************"); - setupBigQueryTable(bigQueryTableOptions.getProject(), - bigQueryTableOptions.getBigQueryDataset(), - bigQueryTableOptions.getBigQueryTable(), - bigQueryTableOptions.getBigQuerySchema()); - pendingMessages.add("The BigQuery table has been set up for this example: " - + bigQueryTableOptions.getProject() - + ":" + bigQueryTableOptions.getBigQueryDataset() - + "." + bigQueryTableOptions.getBigQueryTable()); - } - } - - /** - * Tears down external resources that can be deleted upon the example's completion. - */ - private void tearDown() { - pendingMessages.add("*************************Tear Down*************************"); - ExamplePubsubTopicAndSubscriptionOptions pubsubOptions = - options.as(ExamplePubsubTopicAndSubscriptionOptions.class); - if (!pubsubOptions.getPubsubTopic().isEmpty()) { - try { - deletePubsubTopic(pubsubOptions.getPubsubTopic()); - pendingMessages.add("The Pub/Sub topic has been deleted: " - + pubsubOptions.getPubsubTopic()); - } catch (IOException e) { - pendingMessages.add("Failed to delete the Pub/Sub topic : " - + pubsubOptions.getPubsubTopic()); - } - if (!pubsubOptions.getPubsubSubscription().isEmpty()) { - try { - deletePubsubSubscription(pubsubOptions.getPubsubSubscription()); - pendingMessages.add("The Pub/Sub subscription has been deleted: " - + pubsubOptions.getPubsubSubscription()); - } catch (IOException e) { - pendingMessages.add("Failed to delete the Pub/Sub subscription : " - + pubsubOptions.getPubsubSubscription()); - } - } - } - - ExampleBigQueryTableOptions bigQueryTableOptions = - options.as(ExampleBigQueryTableOptions.class); - if (bigQueryTableOptions.getBigQueryDataset() != null - && bigQueryTableOptions.getBigQueryTable() != null - && bigQueryTableOptions.getBigQuerySchema() != null) { - pendingMessages.add("The BigQuery table might contain the example's output, " - + "and it is not deleted automatically: " - + bigQueryTableOptions.getProject() - + ":" + bigQueryTableOptions.getBigQueryDataset() - + "." + bigQueryTableOptions.getBigQueryTable()); - pendingMessages.add("Please go to the Developers Console to delete it manually." - + " Otherwise, you may be charged for its usage."); - } - } - - /** - * Returns a BigQuery client builder using the specified {@link BigQueryOptions}. - */ - private static Bigquery.Builder newBigQueryClient(BigQueryOptions options) { - return new Bigquery.Builder(Transport.getTransport(), Transport.getJsonFactory(), - chainHttpRequestInitializer( - options.getGcpCredential(), - // Do not log 404. It clutters the output and is possibly even required by the caller. - new RetryHttpRequestInitializer(ImmutableList.of(404)))) - .setApplicationName(options.getAppName()) - .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); - } - - /** - * Returns a Pubsub client builder using the specified {@link PubsubOptions}. - */ - private static Pubsub.Builder newPubsubClient(PubsubOptions options) { - return new Pubsub.Builder(Transport.getTransport(), Transport.getJsonFactory(), - chainHttpRequestInitializer( - options.getGcpCredential(), - // Do not log 404. It clutters the output and is possibly even required by the caller. - new RetryHttpRequestInitializer(ImmutableList.of(404)))) - .setRootUrl(options.getPubsubRootUrl()) - .setApplicationName(options.getAppName()) - .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); - } - - private static HttpRequestInitializer chainHttpRequestInitializer( - Credentials credential, HttpRequestInitializer httpRequestInitializer) { - if (credential == null) { - return new ChainingHttpRequestInitializer( - new NullCredentialInitializer(), httpRequestInitializer); - } else { - return new ChainingHttpRequestInitializer( - new HttpCredentialsAdapter(credential), - httpRequestInitializer); - } - } - - private void setupBigQueryTable(String projectId, String datasetId, String tableId, - TableSchema schema) throws IOException { - if (bigQueryClient == null) { - bigQueryClient = newBigQueryClient(options.as(BigQueryOptions.class)).build(); - } - - Datasets datasetService = bigQueryClient.datasets(); - if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) { - Dataset newDataset = new Dataset().setDatasetReference( - new DatasetReference().setProjectId(projectId).setDatasetId(datasetId)); - datasetService.insert(projectId, newDataset).execute(); - } - - Tables tableService = bigQueryClient.tables(); - Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId)); - if (table == null) { - Table newTable = new Table().setSchema(schema).setTableReference( - new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId)); - tableService.insert(projectId, datasetId, newTable).execute(); - } else if (!table.getSchema().equals(schema)) { - throw new RuntimeException( - "Table exists and schemas do not match, expecting: " + schema.toPrettyString() - + ", actual: " + table.getSchema().toPrettyString()); - } - } - - private void setupPubsubTopic(String topic) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) { - pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute(); - } - } - - private void setupPubsubSubscription(String topic, String subscription) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) == null) { - Subscription subInfo = new Subscription() - .setAckDeadlineSeconds(60) - .setTopic(topic); - pubsubClient.projects().subscriptions().create(subscription, subInfo).execute(); - } - } - - /** - * Deletes the Google Cloud Pub/Sub topic. - * - * @throws IOException if there is a problem deleting the Pub/Sub topic - */ - private void deletePubsubTopic(String topic) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) { - pubsubClient.projects().topics().delete(topic).execute(); - } - } - - /** - * Deletes the Google Cloud Pub/Sub subscription. - * - * @throws IOException if there is a problem deleting the Pub/Sub subscription - */ - private void deletePubsubSubscription(String subscription) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) != null) { - pubsubClient.projects().subscriptions().delete(subscription).execute(); - } - } - - /** - * Waits for the pipeline to finish and cancels it before the program exists. - */ - public void waitToFinish(PipelineResult result) { - pipelinesToCancel.add(result); - if (!options.as(ExampleOptions.class).getKeepJobsRunning()) { - addShutdownHook(pipelinesToCancel); - } - try { - result.waitUntilFinish(); - } catch (UnsupportedOperationException e) { - // Do nothing if the given PipelineResult doesn't support waitUntilFinish(), - // such as EvaluationResults returned by DirectRunner. - tearDown(); - printPendingMessages(); - } catch (Exception e) { - throw new RuntimeException("Failed to wait the pipeline until finish: " + result); - } - } - - private void addShutdownHook(final Collection pipelineResults) { - Runtime.getRuntime() - .addShutdownHook( - new Thread( - () -> { - tearDown(); - printPendingMessages(); - for (PipelineResult pipelineResult : pipelineResults) { - try { - pipelineResult.cancel(); - } catch (IOException e) { - System.out.println("Failed to cancel the job."); - System.out.println(e.getMessage()); - } - } - - for (PipelineResult pipelineResult : pipelineResults) { - boolean cancellationVerified = false; - for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) { - if (pipelineResult.getState().isTerminal()) { - cancellationVerified = true; - break; - } else { - System.out.println( - "The example pipeline is still running. Verifying the cancellation."); - } - Uninterruptibles.sleepUninterruptibly(10, TimeUnit.SECONDS); - } - if (!cancellationVerified) { - System.out.println( - "Failed to verify the cancellation for job: " + pipelineResult); - } - } - })); - } - - private void printPendingMessages() { - System.out.println(); - System.out.println("***********************************************************"); - System.out.println("***********************************************************"); - for (String message : pendingMessages) { - System.out.println(message); - } - System.out.println("***********************************************************"); - System.out.println("***********************************************************"); - } - - private static T executeNullIfNotFound( - AbstractGoogleClientRequest request) throws IOException { - try { - return request.execute(); - } catch (GoogleJsonResponseException e) { - if (e.getStatusCode() == SC_NOT_FOUND) { - return null; - } else { - throw e; - } - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java deleted file mode 100644 index 9796d647b5..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import static com.google.common.base.MoreObjects.firstNonNull; - -import javax.annotation.Nullable; -import org.apache.beam.sdk.io.FileBasedSink; -import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy; -import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions; -import org.apache.beam.sdk.io.fs.ResourceId; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; -import org.joda.time.format.DateTimeFormatter; -import org.joda.time.format.ISODateTimeFormat; - -/** - * A {@link DoFn} that writes elements to files with names deterministically derived from the lower - * and upper bounds of their key (an {@link IntervalWindow}). - * - *

This is test utility code, not for end-users, so examples can be focused on their primary - * lessons. - */ -public class WriteOneFilePerWindow extends PTransform, PDone> { - private static final DateTimeFormatter FORMATTER = ISODateTimeFormat.hourMinute(); - private String filenamePrefix; - @Nullable - private Integer numShards; - - public WriteOneFilePerWindow(String filenamePrefix, Integer numShards) { - this.filenamePrefix = filenamePrefix; - this.numShards = numShards; - } - - @Override - public PDone expand(PCollection input) { - ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); - TextIO.Write write = - TextIO.write() - .to(new PerWindowFiles(resource)) - .withTempDirectory(resource.getCurrentDirectory()) - .withWindowedWrites(); - if (numShards != null) { - write = write.withNumShards(numShards); - } - return input.apply(write); - } - - /** - * A {@link FilenamePolicy} produces a base file name for a write based on metadata about the data - * being written. This always includes the shard number and the total number of shards. For - * windowed writes, it also includes the window and pane index (a sequence number assigned to each - * trigger firing). - */ - public static class PerWindowFiles extends FilenamePolicy { - - private final ResourceId baseFilename; - - public PerWindowFiles(ResourceId baseFilename) { - this.baseFilename = baseFilename; - } - - public String filenamePrefixForWindow(IntervalWindow window) { - String prefix = - baseFilename.isDirectory() ? "" : firstNonNull(baseFilename.getFilename(), ""); - return String.format("%s-%s-%s", - prefix, FORMATTER.print(window.start()), FORMATTER.print(window.end())); - } - - @Override - public ResourceId windowedFilename(int shardNumber, - int numShards, - BoundedWindow window, - PaneInfo paneInfo, - OutputFileHints outputFileHints) { - IntervalWindow intervalWindow = (IntervalWindow) window; - String filename = - String.format( - "%s-%s-of-%s%s", - filenamePrefixForWindow(intervalWindow), - shardNumber, - numShards, - outputFileHints.getSuggestedFilenameSuffix()); - return baseFilename - .getCurrentDirectory() - .resolve(filename, StandardResolveOptions.RESOLVE_FILE); - } - - @Override - public ResourceId unwindowedFilename( - int shardNumber, int numShards, OutputFileHints outputFileHints) { - throw new UnsupportedOperationException("Unsupported."); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java deleted file mode 100644 index 2660cdac2b..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java +++ /dev/null @@ -1,346 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.util.HashMap; -import java.util.Map; -import ${package}.common.ExampleUtils; -import ${package}.complete.game.utils.GameConstants; -import ${package}.complete.game.utils.WriteWindowedToBigQuery; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.Mean; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.transforms.Values; -import org.apache.beam.sdk.transforms.View; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.Sessions; -import org.apache.beam.sdk.transforms.windowing.TimestampCombiner; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This class is the fourth in a series of four pipelines that tell a story in a 'gaming' - * domain, following {@link UserScore}, {@link HourlyTeamScore}, and {@link LeaderBoard}. - * New concepts: session windows and finding session duration; use of both - * singleton and non-singleton side inputs. - * - *

This pipeline builds on the {@link LeaderBoard} functionality, and adds some "business - * intelligence" analysis: abuse detection and usage patterns. The pipeline derives the Mean user - * score sum for a window, and uses that information to identify likely spammers/robots. (The robots - * have a higher click rate than the human users). The 'robot' users are then filtered out when - * calculating the team scores. - * - *

Additionally, user sessions are tracked: that is, we find bursts of user activity using - * session windows. Then, the mean session duration information is recorded in the context of - * subsequent fixed windowing. (This could be used to tell us what games are giving us greater - * user retention). - * - *

Run {@code org.apache.beam.examples.complete.game.injector.Injector} to generate - * pubsub data for this pipeline. The {@code Injector} documentation provides more detail. - * - *

To execute this pipeline, specify the pipeline configuration like this: - *

{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=YOUR_RUNNER
- *   --dataset=YOUR-DATASET
- *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
- * }
- * 
- * - *

The BigQuery dataset you specify must already exist. The PubSub topic you specify should - * be the same topic to which the Injector is publishing. - */ -public class GameStats extends LeaderBoard { - - /** - * Filter out all users but those with a high clickrate, which we will consider as 'spammy' users. - * We do this by finding the mean total score per user, then using that information as a side - * input to filter out all but those user scores that are larger than - * {@code (mean * SCORE_WEIGHT)}. - */ - // [START DocInclude_AbuseDetect] - public static class CalculateSpammyUsers - extends PTransform>, PCollection>> { - private static final Logger LOG = LoggerFactory.getLogger(CalculateSpammyUsers.class); - private static final double SCORE_WEIGHT = 2.5; - - @Override - public PCollection> expand(PCollection> userScores) { - - // Get the sum of scores for each user. - PCollection> sumScores = - userScores.apply("UserSum", Sum.integersPerKey()); - - // Extract the score from each element, and use it to find the global mean. - final PCollectionView globalMeanScore = - sumScores.apply(Values.create()).apply(Mean.globally().asSingletonView()); - - // Filter the user sums using the global mean. - PCollection> filtered = sumScores - .apply("ProcessAndFilter", ParDo - // use the derived mean total score as a side input - .of(new DoFn, KV>() { - private final Counter numSpammerUsers = Metrics.counter("main", "SpammerUsers"); - @ProcessElement - public void processElement(ProcessContext c) { - Integer score = c.element().getValue(); - Double gmc = c.sideInput(globalMeanScore); - if (score > (gmc * SCORE_WEIGHT)) { - LOG.info("user " + c.element().getKey() + " spammer score " + score - + " with mean " + gmc); - numSpammerUsers.inc(); - c.output(c.element()); - } - } - }).withSideInputs(globalMeanScore)); - return filtered; - } - } - // [END DocInclude_AbuseDetect] - - /** - * Calculate and output an element's session duration. - */ - private static class UserSessionInfoFn extends DoFn, Integer> { - @ProcessElement - public void processElement(ProcessContext c, BoundedWindow window) { - IntervalWindow w = (IntervalWindow) window; - int duration = new Duration( - w.start(), w.end()).toPeriod().toStandardMinutes().getMinutes(); - c.output(duration); - } - } - - - /** - * Options supported by {@link GameStats}. - */ - public interface Options extends LeaderBoard.Options { - @Description("Numeric value of fixed window duration for user analysis, in minutes") - @Default.Integer(60) - Integer getFixedWindowDuration(); - void setFixedWindowDuration(Integer value); - - @Description("Numeric value of gap between user sessions, in minutes") - @Default.Integer(5) - Integer getSessionGap(); - void setSessionGap(Integer value); - - @Description("Numeric value of fixed window for finding mean of user session duration, " - + "in minutes") - @Default.Integer(30) - Integer getUserActivityWindowDuration(); - void setUserActivityWindowDuration(Integer value); - - @Description("Prefix used for the BigQuery table names") - @Default.String("game_stats") - String getGameStatsTablePrefix(); - void setGameStatsTablePrefix(String value); - } - - - /** - * Create a map of information that describes how to write pipeline output to BigQuery. This map - * is used to write information about team score sums. - */ - protected static Map>> - configureWindowedWrite() { - Map>> tableConfigure = - new HashMap<>(); - tableConfigure.put( - "team", new WriteWindowedToBigQuery.FieldInfo<>("STRING", (c, w) -> c.element().getKey())); - tableConfigure.put( - "total_score", - new WriteWindowedToBigQuery.FieldInfo<>("INTEGER", (c, w) -> c.element().getValue())); - tableConfigure.put( - "window_start", - new WriteWindowedToBigQuery.FieldInfo<>( - "STRING", - (c, w) -> { - IntervalWindow window = (IntervalWindow) w; - return GameConstants.DATE_TIME_FORMATTER.print(window.start()); - })); - tableConfigure.put( - "processing_time", - new WriteWindowedToBigQuery.FieldInfo<>( - "STRING", (c, w) -> GameConstants.DATE_TIME_FORMATTER.print(Instant.now()))); - return tableConfigure; - } - - /** - * Create a map of information that describes how to write pipeline output to BigQuery. This map - * is used to write information about mean user session time. - */ - protected static Map> - configureSessionWindowWrite() { - - Map> tableConfigure = new HashMap<>(); - tableConfigure.put( - "window_start", - new WriteWindowedToBigQuery.FieldInfo<>( - "STRING", - (c, w) -> { - IntervalWindow window = (IntervalWindow) w; - return GameConstants.DATE_TIME_FORMATTER.print(window.start()); - })); - tableConfigure.put( - "mean_duration", new WriteWindowedToBigQuery.FieldInfo<>("FLOAT", (c, w) -> c.element())); - return tableConfigure; - } - - - - public static void main(String[] args) throws Exception { - - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - // Enforce that this pipeline is always run in streaming mode. - options.setStreaming(true); - ExampleUtils exampleUtils = new ExampleUtils(options); - Pipeline pipeline = Pipeline.create(options); - - // Read Events from Pub/Sub using custom timestamps - PCollection rawEvents = pipeline - .apply(PubsubIO.readStrings() - .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE) - .fromTopic(options.getTopic())) - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())); - - // Extract username/score pairs from the event stream - PCollection> userEvents = - rawEvents.apply("ExtractUserScore", - MapElements - .into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) - .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))); - - // Calculate the total score per user over fixed windows, and - // cumulative updates for late data. - final PCollectionView> spammersView = - userEvents - .apply( - "FixedWindowsUser", - Window.into( - FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))) - - // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate. - // These might be robots/spammers. - .apply("CalculateSpammyUsers", new CalculateSpammyUsers()) - // Derive a view from the collection of spammer users. It will be used as a side input - // in calculating the team score sums, below. - .apply("CreateSpammersView", View.asMap()); - - // [START DocInclude_FilterAndCalc] - // Calculate the total score per team over fixed windows, - // and emit cumulative updates for late data. Uses the side input derived above-- the set of - // suspected robots-- to filter out scores from those users from the sum. - // Write the results to BigQuery. - rawEvents - .apply( - "WindowIntoFixedWindows", - Window.into( - FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))) - // Filter out the detected spammer users, using the side input derived above. - .apply( - "FilterOutSpammers", - ParDo.of( - new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - // If the user is not in the spammers Map, output the data element. - if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) { - c.output(c.element()); - } - } - }) - .withSideInputs(spammersView)) - // Extract and sum teamname/score pairs from the event data. - .apply("ExtractTeamScore", new ExtractAndSumScore("team")) - // [END DocInclude_FilterAndCalc] - // Write the result to BigQuery - .apply( - "WriteTeamSums", - new WriteWindowedToBigQuery<>( - options.as(GcpOptions.class).getProject(), - options.getDataset(), - options.getGameStatsTablePrefix() + "_team", - configureWindowedWrite())); - - // [START DocInclude_SessionCalc] - // Detect user sessions-- that is, a burst of activity separated by a gap from further - // activity. Find and record the mean session lengths. - // This information could help the game designers track the changing user engagement - // as their set of games changes. - userEvents - .apply( - "WindowIntoSessions", - Window.>into( - Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap()))) - .withTimestampCombiner(TimestampCombiner.END_OF_WINDOW)) - // For this use, we care only about the existence of the session, not any particular - // information aggregated over it, so the following is an efficient way to do that. - .apply(Combine.perKey(x -> 0)) - // Get the duration per session. - .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn())) - // [END DocInclude_SessionCalc] - // [START DocInclude_Rewindow] - // Re-window to process groups of session sums according to when the sessions complete. - .apply( - "WindowToExtractSessionMean", - Window.into( - FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration())))) - // Find the mean session duration in each window. - .apply(Mean.globally().withoutDefaults()) - // Write this info to a BigQuery table. - .apply( - "WriteAvgSessionLength", - new WriteWindowedToBigQuery<>( - options.as(GcpOptions.class).getProject(), - options.getDataset(), - options.getGameStatsTablePrefix() + "_sessions", - configureSessionWindowWrite())); - // [END DocInclude_Rewindow] - - - // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the - // command line. - PipelineResult result = pipeline.run(); - exampleUtils.waitToFinish(result); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java deleted file mode 100644 index 05455219fc..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.util.HashMap; -import java.util.Map; -import java.util.TimeZone; -import ${package}.complete.game.utils.GameConstants; -import ${package}.complete.game.utils.WriteToText; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.WithTimestamps; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.joda.time.DateTimeZone; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; - -/** - * This class is the second in a series of four pipelines that tell a story in a 'gaming' - * domain, following {@link UserScore}. In addition to the concepts introduced in {@link UserScore}, - * new concepts include: windowing and element timestamps; use of {@code Filter.by()}. - * - *

This pipeline processes data collected from gaming events in batch, building on {@link - * UserScore} but using fixed windows. It calculates the sum of scores per team, for each window, - * optionally allowing specification of two timestamps before and after which data is filtered out. - * This allows a model where late data collected after the intended analysis window can be included, - * and any late-arriving data prior to the beginning of the analysis window can be removed as well. - * By using windowing and adding element timestamps, we can do finer-grained analysis than with the - * {@link UserScore} pipeline. However, our batch processing is high-latency, in that we don't get - * results from plays at the beginning of the batch's time period until the batch is processed. - * - *

To execute this pipeline, specify the pipeline configuration like this: - *

{@code
- *   --tempLocation=YOUR_TEMP_DIRECTORY
- *   --runner=YOUR_RUNNER
- *   --output=YOUR_OUTPUT_DIRECTORY
- *   (possibly options specific to your runner or permissions for your temp/output locations)
- * }
- * 
- * - *

Optionally include {@code --input} to specify the batch input file path. - * To indicate a time after which the data should be filtered out, include the - * {@code --stopMin} arg. E.g., {@code --stopMin=2015-10-18-23-59} indicates that any data - * timestamped after 23:59 PST on 2015-10-18 should not be included in the analysis. - * To indicate a time before which data should be filtered out, include the {@code --startMin} arg. - * If you're using the default input specified in {@link UserScore}, - * "gs://apache-beam-samples/game/gaming_data*.csv", then - * {@code --startMin=2015-11-16-16-10 --stopMin=2015-11-17-16-10} are good values. - */ -public class HourlyTeamScore extends UserScore { - - private static DateTimeFormatter minFmt = - DateTimeFormat.forPattern("yyyy-MM-dd-HH-mm") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/Los_Angeles"))); - - - /** - * Options supported by {@link HourlyTeamScore}. - */ - public interface Options extends UserScore.Options { - - @Description("Numeric value of fixed window duration, in minutes") - @Default.Integer(60) - Integer getWindowDuration(); - void setWindowDuration(Integer value); - - @Description("String representation of the first minute after which to generate results," - + "in the format: yyyy-MM-dd-HH-mm . This time should be in PST." - + "Any input data timestamped prior to that minute won't be included in the sums.") - @Default.String("1970-01-01-00-00") - String getStartMin(); - void setStartMin(String value); - - @Description("String representation of the first minute for which to not generate results," - + "in the format: yyyy-MM-dd-HH-mm . This time should be in PST." - + "Any input data timestamped after that minute won't be included in the sums.") - @Default.String("2100-01-01-00-00") - String getStopMin(); - void setStopMin(String value); - } - - /** - * Create a map of information that describes how to write pipeline output to text. This map - * is passed to the {@link WriteToText} constructor to write team score sums and - * includes information about window start time. - */ - protected static Map>> - configureOutput() { - Map>> config = new HashMap<>(); - config.put("team", (c, w) -> c.element().getKey()); - config.put("total_score", (c, w) -> c.element().getValue()); - config.put( - "window_start", - (c, w) -> { - IntervalWindow window = (IntervalWindow) w; - return GameConstants.DATE_TIME_FORMATTER.print(window.start()); - }); - return config; - } - - - /** - * Run a batch pipeline to do windowed analysis of the data. - */ - // [START DocInclude_HTSMain] - public static void main(String[] args) throws Exception { - // Begin constructing a pipeline configured by commandline flags. - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - Pipeline pipeline = Pipeline.create(options); - - final Instant stopMinTimestamp = new Instant(minFmt.parseMillis(options.getStopMin())); - final Instant startMinTimestamp = new Instant(minFmt.parseMillis(options.getStartMin())); - - // Read 'gaming' events from a text file. - pipeline - .apply(TextIO.read().from(options.getInput())) - // Parse the incoming data. - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) - - // Filter out data before and after the given times so that it is not included - // in the calculations. As we collect data in batches (say, by day), the batch for the day - // that we want to analyze could potentially include some late-arriving data from the - // previous day. - // If so, we want to weed it out. Similarly, if we include data from the following day - // (to scoop up late-arriving events from the day we're analyzing), we need to weed out - // events that fall after the time period we want to analyze. - // [START DocInclude_HTSFilters] - .apply( - "FilterStartTime", - Filter.by( - (GameActionInfo gInfo) -> gInfo.getTimestamp() > startMinTimestamp.getMillis())) - .apply( - "FilterEndTime", - Filter.by( - (GameActionInfo gInfo) -> gInfo.getTimestamp() < stopMinTimestamp.getMillis())) - // [END DocInclude_HTSFilters] - - // [START DocInclude_HTSAddTsAndWindow] - // Add an element timestamp based on the event log, and apply fixed windowing. - .apply( - "AddEventTimestamps", - WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp()))) - .apply( - "FixedWindowsTeam", - Window.into(FixedWindows.of(Duration.standardMinutes(options.getWindowDuration())))) - // [END DocInclude_HTSAddTsAndWindow] - - // Extract and sum teamname/score pairs from the event data. - .apply("ExtractTeamScore", new ExtractAndSumScore("team")) - .apply( - "WriteTeamScoreSums", new WriteToText<>(options.getOutput(), configureOutput(), true)); - - pipeline.run().waitUntilFinish(); - } - // [END DocInclude_HTSMain] - -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java deleted file mode 100644 index b5983fa789..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import com.google.common.annotations.VisibleForTesting; -import java.util.HashMap; -import java.util.Map; -import ${package}.common.ExampleOptions; -import ${package}.common.ExampleUtils; -import ${package}.complete.game.utils.GameConstants; -import ${package}.complete.game.utils.WriteToBigQuery; -import ${package}.complete.game.utils.WriteWindowedToBigQuery; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.options.StreamingOptions; -import org.apache.beam.sdk.options.Validation; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime; -import org.apache.beam.sdk.transforms.windowing.AfterWatermark; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.GlobalWindows; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.Repeatedly; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.joda.time.Duration; -import org.joda.time.Instant; - -/** - * This class is the third in a series of four pipelines that tell a story in a 'gaming' domain, - * following {@link UserScore} and {@link HourlyTeamScore}. Concepts include: processing unbounded - * data using fixed windows; use of custom timestamps and event-time processing; generation of - * early/speculative results; using .accumulatingFiredPanes() to do cumulative processing of late- - * arriving data. - * - *

This pipeline processes an unbounded stream of 'game events'. The calculation of the team - * scores uses fixed windowing based on event time (the time of the game play event), not - * processing time (the time that an event is processed by the pipeline). The pipeline calculates - * the sum of scores per team, for each window. By default, the team scores are calculated using - * one-hour windows. - * - *

In contrast-- to demo another windowing option-- the user scores are calculated using a - * global window, which periodically (every ten minutes) emits cumulative user score sums. - * - *

In contrast to the previous pipelines in the series, which used static, finite input data, - * here we're using an unbounded data source, which lets us provide speculative results, and allows - * handling of late data, at much lower latency. We can use the early/speculative results to keep a - * 'leaderboard' updated in near-realtime. Our handling of late data lets us generate correct - * results, e.g. for 'team prizes'. We're now outputting window results as they're - * calculated, giving us much lower latency than with the previous batch examples. - * - *

Run {@code injector.Injector} to generate pubsub data for this pipeline. The Injector - * documentation provides more detail on how to do this. - * - *

To execute this pipeline, specify the pipeline configuration like this: - *

{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=YOUR_RUNNER
- *   --dataset=YOUR-DATASET
- *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
- * }
- * 
- * - *

The BigQuery dataset you specify must already exist. The PubSub topic you specify should be - * the same topic to which the Injector is publishing. - */ -public class LeaderBoard extends HourlyTeamScore { - - static final Duration FIVE_MINUTES = Duration.standardMinutes(5); - static final Duration TEN_MINUTES = Duration.standardMinutes(10); - - - /** - * Options supported by {@link LeaderBoard}. - */ - public interface Options extends HourlyTeamScore.Options, ExampleOptions, StreamingOptions { - - @Description("BigQuery Dataset to write tables to. Must already exist.") - @Validation.Required - String getDataset(); - void setDataset(String value); - - @Description("Pub/Sub topic to read from") - @Validation.Required - String getTopic(); - void setTopic(String value); - - @Description("Numeric value of fixed window duration for team analysis, in minutes") - @Default.Integer(60) - Integer getTeamWindowDuration(); - void setTeamWindowDuration(Integer value); - - @Description("Numeric value of allowed data lateness, in minutes") - @Default.Integer(120) - Integer getAllowedLateness(); - void setAllowedLateness(Integer value); - - @Description("Prefix used for the BigQuery table names") - @Default.String("leaderboard") - String getLeaderBoardTableName(); - void setLeaderBoardTableName(String value); - } - - /** - * Create a map of information that describes how to write pipeline output to BigQuery. This map - * is used to write team score sums and includes event timing information. - */ - protected static Map>> - configureWindowedTableWrite() { - - Map>> tableConfigure = - new HashMap<>(); - tableConfigure.put( - "team", new WriteWindowedToBigQuery.FieldInfo<>("STRING", (c, w) -> c.element().getKey())); - tableConfigure.put( - "total_score", - new WriteWindowedToBigQuery.FieldInfo<>("INTEGER", (c, w) -> c.element().getValue())); - tableConfigure.put( - "window_start", - new WriteWindowedToBigQuery.FieldInfo<>( - "STRING", - (c, w) -> { - IntervalWindow window = (IntervalWindow) w; - return GameConstants.DATE_TIME_FORMATTER.print(window.start()); - })); - tableConfigure.put( - "processing_time", - new WriteWindowedToBigQuery.FieldInfo<>( - "STRING", (c, w) -> GameConstants.DATE_TIME_FORMATTER.print(Instant.now()))); - tableConfigure.put( - "timing", - new WriteWindowedToBigQuery.FieldInfo<>( - "STRING", (c, w) -> c.pane().getTiming().toString())); - return tableConfigure; - } - - - /** - * Create a map of information that describes how to write pipeline output to BigQuery. This map - * is passed to the {@link WriteToBigQuery} constructor to write user score sums. - */ - protected static Map>> - configureBigQueryWrite() { - Map>> tableConfigure = new HashMap<>(); - tableConfigure.put( - "user", new WriteToBigQuery.FieldInfo<>("STRING", (c, w) -> c.element().getKey())); - tableConfigure.put( - "total_score", - new WriteToBigQuery.FieldInfo<>("INTEGER", (c, w) -> c.element().getValue())); - return tableConfigure; - } - - - /** - * Create a map of information that describes how to write pipeline output to BigQuery. This map - * is used to write user score sums. - */ - protected static Map>> - configureGlobalWindowBigQueryWrite() { - - Map>> tableConfigure = - configureBigQueryWrite(); - tableConfigure.put( - "processing_time", - new WriteToBigQuery.FieldInfo<>( - "STRING", (c, w) -> GameConstants.DATE_TIME_FORMATTER.print(Instant.now()))); - return tableConfigure; - } - - - public static void main(String[] args) throws Exception { - - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - // Enforce that this pipeline is always run in streaming mode. - options.setStreaming(true); - ExampleUtils exampleUtils = new ExampleUtils(options); - Pipeline pipeline = Pipeline.create(options); - - // Read game events from Pub/Sub using custom timestamps, which are extracted from the pubsub - // data elements, and parse the data. - PCollection gameEvents = pipeline - .apply(PubsubIO.readStrings() - .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE) - .fromTopic(options.getTopic())) - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())); - - gameEvents - .apply( - "CalculateTeamScores", - new CalculateTeamScores( - Duration.standardMinutes(options.getTeamWindowDuration()), - Duration.standardMinutes(options.getAllowedLateness()))) - // Write the results to BigQuery. - .apply( - "WriteTeamScoreSums", - new WriteWindowedToBigQuery<>( - options.as(GcpOptions.class).getProject(), - options.getDataset(), - options.getLeaderBoardTableName() + "_team", - configureWindowedTableWrite())); - gameEvents - .apply( - "CalculateUserScores", - new CalculateUserScores(Duration.standardMinutes(options.getAllowedLateness()))) - // Write the results to BigQuery. - .apply( - "WriteUserScoreSums", - new WriteToBigQuery<>( - options.as(GcpOptions.class).getProject(), - options.getDataset(), - options.getLeaderBoardTableName() + "_user", - configureGlobalWindowBigQueryWrite())); - - // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the - // command line. - PipelineResult result = pipeline.run(); - exampleUtils.waitToFinish(result); - } - - /** - * Calculates scores for each team within the configured window duration. - */ - // [START DocInclude_WindowAndTrigger] - // Extract team/score pairs from the event stream, using hour-long windows by default. - @VisibleForTesting - static class CalculateTeamScores - extends PTransform, PCollection>> { - private final Duration teamWindowDuration; - private final Duration allowedLateness; - - CalculateTeamScores(Duration teamWindowDuration, Duration allowedLateness) { - this.teamWindowDuration = teamWindowDuration; - this.allowedLateness = allowedLateness; - } - - @Override - public PCollection> expand(PCollection infos) { - return infos.apply("LeaderboardTeamFixedWindows", - Window.into(FixedWindows.of(teamWindowDuration)) - // We will get early (speculative) results as well as cumulative - // processing of late data. - .triggering(AfterWatermark.pastEndOfWindow() - .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane() - .plusDelayOf(FIVE_MINUTES)) - .withLateFirings(AfterProcessingTime.pastFirstElementInPane() - .plusDelayOf(TEN_MINUTES))) - .withAllowedLateness(allowedLateness) - .accumulatingFiredPanes()) - // Extract and sum teamname/score pairs from the event data. - .apply("ExtractTeamScore", new ExtractAndSumScore("team")); - } - } - // [END DocInclude_WindowAndTrigger] - - // [START DocInclude_ProcTimeTrigger] - /** - * Extract user/score pairs from the event stream using processing time, via global windowing. - * Get periodic updates on all users' running scores. - */ - @VisibleForTesting - static class CalculateUserScores - extends PTransform, PCollection>> { - private final Duration allowedLateness; - - CalculateUserScores(Duration allowedLateness) { - this.allowedLateness = allowedLateness; - } - - @Override - public PCollection> expand(PCollection input) { - return input.apply("LeaderboardUserGlobalWindow", - Window.into(new GlobalWindows()) - // Get periodic results every ten minutes. - .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane() - .plusDelayOf(TEN_MINUTES))) - .accumulatingFiredPanes() - .withAllowedLateness(allowedLateness)) - // Extract and sum username/score pairs from the event data. - .apply("ExtractUserScore", new ExtractAndSumScore("user")); - } - } - // [END DocInclude_ProcTimeTrigger] -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/StatefulTeamScore.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/StatefulTeamScore.java deleted file mode 100644 index c0a7bc8e17..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/StatefulTeamScore.java +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import static com.google.common.base.MoreObjects.firstNonNull; - -import com.google.common.annotations.VisibleForTesting; -import java.util.HashMap; -import java.util.Map; -import ${package}.common.ExampleUtils; -import ${package}.complete.game.utils.GameConstants; -import ${package}.complete.game.utils.WriteToBigQuery.FieldInfo; -import ${package}.complete.game.utils.WriteWindowedToBigQuery; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.coders.VarIntCoder; -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.state.StateSpec; -import org.apache.beam.sdk.state.StateSpecs; -import org.apache.beam.sdk.state.ValueState; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.joda.time.Instant; - -/** - * This class is part of a series of pipelines that tell a story in a gaming domain. Concepts - * include: stateful processing. - * - *

This pipeline processes an unbounded stream of 'game events'. It uses stateful processing to - * aggregate team scores per team and outputs team name and it's total score every time the team - * passes a new multiple of a threshold score. For example, multiples of the threshold could be the - * corresponding scores required to pass each level of the game. By default, this threshold is set - * to 5000. - * - *

Stateful processing allows us to write pipelines that output based on a runtime state (when - * a team reaches a certain score, in every 100 game events etc) without time triggers. See - * https://beam.apache.org/blog/2017/02/13/stateful-processing.html for more information on using - * stateful processing. - * - *

Run {@code injector.Injector} to generate pubsub data for this pipeline. The Injector - * documentation provides more detail on how to do this. - * - *

To execute this pipeline, specify the pipeline configuration like this: - *

{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=YOUR_RUNNER
- *   --dataset=YOUR-DATASET
- *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
- * }
- * 
- * - *

The BigQuery dataset you specify must already exist. The PubSub topic you specify should be - * the same topic to which the Injector is publishing. - */ -public class StatefulTeamScore extends LeaderBoard { - - /** - * Options supported by {@link StatefulTeamScore}. - */ - interface Options extends LeaderBoard.Options { - - @Description("Numeric value, multiple of which is used as threshold for outputting team score.") - @Default.Integer(5000) - Integer getThresholdScore(); - - void setThresholdScore(Integer value); - } - - /** - * Create a map of information that describes how to write pipeline output to BigQuery. This map - * is used to write team score sums. - */ - private static Map>> configureCompleteWindowedTableWrite() { - - Map>> tableConfigure = - new HashMap<>(); - tableConfigure.put( - "team", new WriteWindowedToBigQuery.FieldInfo<>("STRING", (c, w) -> c.element().getKey())); - tableConfigure.put( - "total_score", - new WriteWindowedToBigQuery.FieldInfo<>("INTEGER", (c, w) -> c.element().getValue())); - tableConfigure.put( - "processing_time", - new WriteWindowedToBigQuery.FieldInfo<>( - "STRING", (c, w) -> GameConstants.DATE_TIME_FORMATTER.print(Instant.now()))); - return tableConfigure; - } - - - public static void main(String[] args) throws Exception { - - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - // Enforce that this pipeline is always run in streaming mode. - options.setStreaming(true); - ExampleUtils exampleUtils = new ExampleUtils(options); - Pipeline pipeline = Pipeline.create(options); - - pipeline - // Read game events from Pub/Sub using custom timestamps, which are extracted from the - // pubsub data elements, and parse the data. - .apply( - PubsubIO.readStrings() - .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE) - .fromTopic(options.getTopic())) - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) - // Create mapping. UpdateTeamScore uses team name as key. - .apply( - "MapTeamAsKey", - MapElements.into( - TypeDescriptors.kvs( - TypeDescriptors.strings(), TypeDescriptor.of(GameActionInfo.class))) - .via((GameActionInfo gInfo) -> KV.of(gInfo.team, gInfo))) - // Outputs a team's score every time it passes a new multiple of the threshold. - .apply("UpdateTeamScore", ParDo.of(new UpdateTeamScoreFn(options.getThresholdScore()))) - // Write the results to BigQuery. - .apply( - "WriteTeamLeaders", - new WriteWindowedToBigQuery<>( - options.as(GcpOptions.class).getProject(), - options.getDataset(), - options.getLeaderBoardTableName() + "_team_leader", - configureCompleteWindowedTableWrite())); - - // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the - // command line. - PipelineResult result = pipeline.run(); - exampleUtils.waitToFinish(result); - } - - /** - * Tracks each team's score separately in a single state cell and outputs the score every time it - * passes a new multiple of a threshold. - * - *

We use stateful {@link DoFn} because: - *

    - *
  • State is key-partitioned. Therefore, the score is calculated per team.
  • - *
  • Stateful {@link DoFn} can determine when to output based on the state. This only allows - * outputting when a team's score passes a given threshold.
  • - *
- */ - @VisibleForTesting - public static class UpdateTeamScoreFn - extends DoFn, KV> { - - private static final String TOTAL_SCORE = "totalScore"; - private final int thresholdScore; - - public UpdateTeamScoreFn(int thresholdScore) { - this.thresholdScore = thresholdScore; - } - - /** - * Describes the state for storing team score. Let's break down this statement. - * - * {@link StateSpec} configures the state cell, which is provided by a runner during pipeline - * execution. - * - * {@link org.apache.beam.sdk.transforms.DoFn.StateId} annotation assigns an identifier to the - * state, which is used to refer the state in - * {@link org.apache.beam.sdk.transforms.DoFn.ProcessElement}. - * - *

A {@link ValueState} stores single value per key and per window. Because our pipeline is - * globally windowed in this example, this {@link ValueState} is just key partitioned, with one - * score per team. Any other class that extends {@link org.apache.beam.sdk.state.State} can be - * used.

- * - *

In order to store the value, the state must be encoded. Therefore, we provide a coder, in - * this case the {@link VarIntCoder}. If the coder is not provided as in - * {@code StateSpecs.value()}, Beam's coder inference will try to provide a coder automatically. - *

- */ - @StateId(TOTAL_SCORE) - private final StateSpec> totalScoreSpec = - StateSpecs.value(VarIntCoder.of()); - - /** - * To use a state cell, annotate a parameter with - * {@link org.apache.beam.sdk.transforms.DoFn.StateId} that matches the state declaration. The - * type of the parameter should match the {@link StateSpec} type. - */ - @ProcessElement - public void processElement( - ProcessContext c, - @StateId(TOTAL_SCORE) ValueState totalScore) { - String teamName = c.element().getKey(); - GameActionInfo gInfo = c.element().getValue(); - - // ValueState cells do not contain a default value. If the state is possibly not written, make - // sure to check for null on read. - int oldTotalScore = firstNonNull(totalScore.read(), 0); - totalScore.write(oldTotalScore + gInfo.score); - - // Since there are no negative scores, the easiest way to check whether a team just passed a - // new multiple of the threshold score is to compare the quotients of dividing total scores by - // threshold before and after this aggregation. For example, if the total score was 1999, - // the new total is 2002, and the threshold is 1000, 1999 / 1000 = 1, 2002 / 1000 = 2. - // Therefore, this team passed the threshold. - if (oldTotalScore / this.thresholdScore < totalScore.read() / this.thresholdScore) { - c.output(KV.of(teamName, totalScore.read())); - } - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java deleted file mode 100644 index 3459d043f5..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.util.HashMap; -import java.util.Map; -import org.apache.avro.reflect.Nullable; -import ${package}.complete.game.utils.WriteToText; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.AvroCoder; -import org.apache.beam.sdk.coders.DefaultCoder; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.options.Validation; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This class is the first in a series of four pipelines that tell a story in a 'gaming' domain. - * Concepts: batch processing, reading input from text files, writing output to - * text files, using standalone DoFns, use of the sum per key transform, and use of - * Java 8 lambda syntax. - * - *

In this gaming scenario, many users play, as members of different teams, over the course of a - * day, and their actions are logged for processing. Some of the logged game events may be late- - * arriving, if users play on mobile devices and go transiently offline for a period. - * - *

This pipeline does batch processing of data collected from gaming events. It calculates the - * sum of scores per user, over an entire batch of gaming data (collected, say, for each day). The - * batch processing will not include any late data that arrives after the day's cutoff point. - * - *

To execute this pipeline, specify the pipeline configuration like this: - *

{@code
- *   --tempLocation=YOUR_TEMP_DIRECTORY
- *   --runner=YOUR_RUNNER
- *   --output=YOUR_OUTPUT_DIRECTORY
- *   (possibly options specific to your runner or permissions for your temp/output locations)
- * }
- * 
- * - *

Optionally include the --input argument to specify a batch input file. - * See the --input default value for example batch data file, or use {@code injector.Injector} to - * generate your own batch data. - */ -public class UserScore { - - /** - * Class to hold info about a game event. - */ - @DefaultCoder(AvroCoder.class) - static class GameActionInfo { - @Nullable String user; - @Nullable String team; - @Nullable Integer score; - @Nullable Long timestamp; - - public GameActionInfo() {} - - public GameActionInfo(String user, String team, Integer score, Long timestamp) { - this.user = user; - this.team = team; - this.score = score; - this.timestamp = timestamp; - } - - public String getUser() { - return this.user; - } - public String getTeam() { - return this.team; - } - public Integer getScore() { - return this.score; - } - public String getKey(String keyname) { - if ("team".equals(keyname)) { - return this.team; - } else { // return username as default - return this.user; - } - } - public Long getTimestamp() { - return this.timestamp; - } - } - - - /** - * Parses the raw game event info into GameActionInfo objects. Each event line has the following - * format: username,teamname,score,timestamp_in_ms,readable_time - * e.g.: - * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224 - * The human-readable time string is not used here. - */ - static class ParseEventFn extends DoFn { - - // Log and count parse errors. - private static final Logger LOG = LoggerFactory.getLogger(ParseEventFn.class); - private final Counter numParseErrors = Metrics.counter("main", "ParseErrors"); - - @ProcessElement - public void processElement(ProcessContext c) { - System.out.println("GOT " + c.element()); - String[] components = c.element().split(",", -1); - try { - String user = components[0].trim(); - String team = components[1].trim(); - Integer score = Integer.parseInt(components[2].trim()); - Long timestamp = Long.parseLong(components[3].trim()); - GameActionInfo gInfo = new GameActionInfo(user, team, score, timestamp); - c.output(gInfo); - } catch (ArrayIndexOutOfBoundsException | NumberFormatException e) { - numParseErrors.inc(); - LOG.info("Parse error on " + c.element() + ", " + e.getMessage()); - } - } - } - - /** - * A transform to extract key/score information from GameActionInfo, and sum the scores. The - * constructor arg determines whether 'team' or 'user' info is extracted. - */ - // [START DocInclude_USExtractXform] - public static class ExtractAndSumScore - extends PTransform, PCollection>> { - - private final String field; - - ExtractAndSumScore(String field) { - this.field = field; - } - - @Override - public PCollection> expand( - PCollection gameInfo) { - - return gameInfo - .apply( - MapElements.into( - TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) - .via((GameActionInfo gInfo) -> KV.of(gInfo.getKey(field), gInfo.getScore()))) - .apply(Sum.integersPerKey()); - } - } - // [END DocInclude_USExtractXform] - - - /** - * Options supported by {@link UserScore}. - */ - public interface Options extends PipelineOptions { - - @Description("Path to the data file(s) containing game data.") - // The default maps to two large Google Cloud Storage files (each ~12GB) holding two subsequent - // day's worth (roughly) of data. - @Default.String("gs://apache-beam-samples/game/gaming_data*.csv") - String getInput(); - void setInput(String value); - - // Set this required option to specify where to write the output. - @Description("Path of the file to write to.") - @Validation.Required - String getOutput(); - void setOutput(String value); - } - - /** - * Create a map of information that describes how to write pipeline output to text. This map - * is passed to the {@link WriteToText} constructor to write user score sums. - */ - protected static Map>> - configureOutput() { - Map>> config = new HashMap<>(); - config.put("user", (c, w) -> c.element().getKey()); - config.put("total_score", (c, w) -> c.element().getValue()); - return config; - } - - /** - * Run a batch pipeline. - */ - // [START DocInclude_USMain] - public static void main(String[] args) throws Exception { - // Begin constructing a pipeline configured by commandline flags. - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - Pipeline pipeline = Pipeline.create(options); - - // Read events from a text file and parse them. - pipeline - .apply(TextIO.read().from(options.getInput())) - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) - // Extract and sum username/score pairs from the event data. - .apply("ExtractUserScore", new ExtractAndSumScore("user")) - .apply( - "WriteUserScoreSums", new WriteToText<>(options.getOutput(), configureOutput(), false)); - - // Run the batch pipeline. - pipeline.run().waitUntilFinish(); - } - // [END DocInclude_USMain] -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java deleted file mode 100644 index c21ec2e319..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.injector; - -import com.google.api.services.pubsub.Pubsub; -import com.google.api.services.pubsub.model.PublishRequest; -import com.google.api.services.pubsub.model.PubsubMessage; -import com.google.common.collect.ImmutableMap; -import java.io.BufferedOutputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; -import ${package}.complete.game.utils.GameConstants; - -/** - * This is a generator that simulates usage data from a mobile game, and either publishes the data - * to a pubsub topic or writes it to a file. - * - *

The general model used by the generator is the following. There is a set of teams with team - * members. Each member is scoring points for their team. After some period, a team will dissolve - * and a new one will be created in its place. There is also a set of 'Robots', or spammer users. - * They hop from team to team. The robots are set to have a higher 'click rate' (generate more - * events) than the regular team members. - * - *

Each generated line of data has the following form: - * username,teamname,score,timestamp_in_ms,readable_time - * e.g.: - * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224 - * - *

The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if - * specified. It takes the following arguments: - * {@code Injector project-name (topic-name|none) (filename|none)}. - * - *

To run the Injector in the mode where it publishes to PubSub, you will need to authenticate - * locally using project-based service account credentials to avoid running over PubSub - * quota. - * See https://developers.google.com/identity/protocols/application-default-credentials - * for more information on using service account credentials. Set the GOOGLE_APPLICATION_CREDENTIALS - * environment variable to point to your downloaded service account credentials before starting the - * program, e.g.: - * {@code export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/credentials-key.json}. - * If you do not do this, then your injector will only run for a few minutes on your - * 'user account' credentials before you will start to see quota error messages like: - * "Request throttled due to user QPS limit being reached", and see this exception: - * ".com.google.api.client.googleapis.json.GoogleJsonResponseException: 429 Too Many Requests". - * Once you've set up your credentials, run the Injector like this": - *

{@code
- * Injector   none
- * }
- * 
- * The pubsub topic will be created if it does not exist. - * - *

To run the injector in write-to-file-mode, set the topic name to "none" and specify the - * filename: - *

{@code
- * Injector  none 
- * }
- * 
- */ -class Injector { - private static Pubsub pubsub; - private static Random random = new Random(); - private static String topic; - private static String project; - - // QPS ranges from 800 to 1000. - private static final int MIN_QPS = 800; - private static final int QPS_RANGE = 200; - // How long to sleep, in ms, between creation of the threads that make API requests to PubSub. - private static final int THREAD_SLEEP_MS = 500; - - // Lists used to generate random team names. - // If COLORS is changed, please also make changes in - // release/src/main/groovy/MobileGamingCommands.COLORS - private static final ArrayList COLORS = - new ArrayList<>( - Arrays.asList( - "Magenta", - "AliceBlue", - "Almond", - "Amaranth", - "Amber", - "Amethyst", - "AndroidGreen", - "AntiqueBrass", - "Fuchsia", - "Ruby", - "AppleGreen", - "Apricot", - "Aqua", - "ArmyGreen", - "Asparagus", - "Auburn", - "Azure", - "Banana", - "Beige", - "Bisque", - "BarnRed", - "BattleshipGrey")); - - private static final ArrayList ANIMALS = - new ArrayList<>( - Arrays.asList( - "Echidna", - "Koala", - "Wombat", - "Marmot", - "Quokka", - "Kangaroo", - "Dingo", - "Numbat", - "Emu", - "Wallaby", - "CaneToad", - "Bilby", - "Possum", - "Cassowary", - "Kookaburra", - "Platypus", - "Bandicoot", - "Cockatoo", - "Antechinus")); - - // The list of live teams. - private static ArrayList liveTeams = new ArrayList<>(); - - // The total number of robots in the system. - private static final int NUM_ROBOTS = 20; - // Determines the chance that a team will have a robot team member. - private static final int ROBOT_PROBABILITY = 3; - private static final int NUM_LIVE_TEAMS = 15; - private static final int BASE_MEMBERS_PER_TEAM = 5; - private static final int MEMBERS_PER_TEAM = 15; - private static final int MAX_SCORE = 20; - private static final int LATE_DATA_RATE = 5 * 60 * 2; // Every 10 minutes - private static final int BASE_DELAY_IN_MILLIS = 5 * 60 * 1000; // 5-10 minute delay - private static final int FUZZY_DELAY_IN_MILLIS = 5 * 60 * 1000; - - // The minimum time a 'team' can live. - private static final int BASE_TEAM_EXPIRATION_TIME_IN_MINS = 20; - private static final int TEAM_EXPIRATION_TIME_IN_MINS = 20; - - - /** - * A class for holding team info: the name of the team, when it started, - * and the current team members. Teams may but need not include one robot team member. - */ - private static class TeamInfo { - String teamName; - long startTimeInMillis; - int expirationPeriod; - // The team might but need not include 1 robot. Will be non-null if so. - String robot; - int numMembers; - - private TeamInfo(String teamName, long startTimeInMillis, String robot) { - this.teamName = teamName; - this.startTimeInMillis = startTimeInMillis; - // How long until this team is dissolved. - this.expirationPeriod = random.nextInt(TEAM_EXPIRATION_TIME_IN_MINS) - + BASE_TEAM_EXPIRATION_TIME_IN_MINS; - this.robot = robot; - // Determine the number of team members. - numMembers = random.nextInt(MEMBERS_PER_TEAM) + BASE_MEMBERS_PER_TEAM; - } - - String getTeamName() { - return teamName; - } - String getRobot() { - return robot; - } - - long getStartTimeInMillis() { - return startTimeInMillis; - } - long getEndTimeInMillis() { - return startTimeInMillis + (expirationPeriod * 60L * 1000L); - } - String getRandomUser() { - int userNum = random.nextInt(numMembers); - return "user" + userNum + "_" + teamName; - } - - int numMembers() { - return numMembers; - } - - @Override - public String toString() { - return "(" + teamName + ", num members: " + numMembers() + ", starting at: " - + startTimeInMillis + ", expires in: " + expirationPeriod + ", robot: " + robot + ")"; - } - } - - /** Utility to grab a random element from an array of Strings. */ - private static String randomElement(ArrayList list) { - int index = random.nextInt(list.size()); - return list.get(index); - } - - /** - * Get and return a random team. If the selected team is too old w.r.t its expiration, remove - * it, replacing it with a new team. - */ - private static TeamInfo randomTeam(ArrayList list) { - int index = random.nextInt(list.size()); - TeamInfo team = list.get(index); - // If the selected team is expired, remove it and return a new team. - long currTime = System.currentTimeMillis(); - if ((team.getEndTimeInMillis() < currTime) || team.numMembers() == 0) { - System.out.println("\nteam " + team + " is too old; replacing."); - System.out.println("start time: " + team.getStartTimeInMillis() - + ", end time: " + team.getEndTimeInMillis() - + ", current time:" + currTime); - removeTeam(index); - // Add a new team in its stead. - return (addLiveTeam()); - } else { - return team; - } - } - - /** - * Create and add a team. Possibly add a robot to the team. - */ - private static synchronized TeamInfo addLiveTeam() { - String teamName = randomElement(COLORS) + randomElement(ANIMALS); - String robot = null; - // Decide if we want to add a robot to the team. - if (random.nextInt(ROBOT_PROBABILITY) == 0) { - robot = "Robot-" + random.nextInt(NUM_ROBOTS); - } - // Create the new team. - TeamInfo newTeam = new TeamInfo(teamName, System.currentTimeMillis(), robot); - liveTeams.add(newTeam); - System.out.println("[+" + newTeam + "]"); - return newTeam; - } - - /** - * Remove a specific team. - */ - private static synchronized void removeTeam(int teamIndex) { - TeamInfo removedTeam = liveTeams.remove(teamIndex); - System.out.println("[-" + removedTeam + "]"); - } - - /** Generate a user gaming event. */ - private static String generateEvent(Long currTime, int delayInMillis) { - TeamInfo team = randomTeam(liveTeams); - String teamName = team.getTeamName(); - String user; - final int parseErrorRate = 900000; - - String robot = team.getRobot(); - // If the team has an associated robot team member... - if (robot != null) { - // Then use that robot for the message with some probability. - // Set this probability to higher than that used to select any of the 'regular' team - // members, so that if there is a robot on the team, it has a higher click rate. - if (random.nextInt(team.numMembers() / 2) == 0) { - user = robot; - } else { - user = team.getRandomUser(); - } - } else { // No robot. - user = team.getRandomUser(); - } - String event = user + "," + teamName + "," + random.nextInt(MAX_SCORE); - // Randomly introduce occasional parse errors. - if (random.nextInt(parseErrorRate) == 0) { - System.out.println("Introducing a parse error."); - event = "THIS LINE REPRESENTS CORRUPT DATA AND WILL CAUSE A PARSE ERROR"; - } - return addTimeInfoToEvent(event, currTime, delayInMillis); - } - - /** - * Add time info to a generated gaming event. - */ - private static String addTimeInfoToEvent(String message, Long currTime, int delayInMillis) { - String eventTimeString = - Long.toString((currTime - delayInMillis) / 1000 * 1000); - // Add a (redundant) 'human-readable' date string to make the data semantics more clear. - String dateString = GameConstants.DATE_TIME_FORMATTER.print(currTime); - message = message + "," + eventTimeString + "," + dateString; - return message; - } - - /** - * Publish 'numMessages' arbitrary events from live users with the provided delay, to a - * PubSub topic. - */ - public static void publishData(int numMessages, int delayInMillis) - throws IOException { - List pubsubMessages = new ArrayList<>(); - - for (int i = 0; i < Math.max(1, numMessages); i++) { - Long currTime = System.currentTimeMillis(); - String message = generateEvent(currTime, delayInMillis); - PubsubMessage pubsubMessage = new PubsubMessage() - .encodeData(message.getBytes("UTF-8")); - pubsubMessage.setAttributes( - ImmutableMap.of(GameConstants.TIMESTAMP_ATTRIBUTE, - Long.toString((currTime - delayInMillis) / 1000 * 1000))); - if (delayInMillis != 0) { - System.out.println(pubsubMessage.getAttributes()); - System.out.println("late data for: " + message); - } - pubsubMessages.add(pubsubMessage); - } - - PublishRequest publishRequest = new PublishRequest(); - publishRequest.setMessages(pubsubMessages); - pubsub.projects().topics().publish(topic, publishRequest).execute(); - } - - /** - * Publish generated events to a file. - */ - public static void publishDataToFile(String fileName, int numMessages, int delayInMillis) - throws IOException { - PrintWriter out = new PrintWriter(new OutputStreamWriter( - new BufferedOutputStream(new FileOutputStream(fileName, true)), "UTF-8")); - - try { - for (int i = 0; i < Math.max(1, numMessages); i++) { - Long currTime = System.currentTimeMillis(); - String message = generateEvent(currTime, delayInMillis); - out.println(message); - } - } catch (Exception e) { - System.err.print("Error in writing generated events to file"); - e.printStackTrace(); - } finally { - out.flush(); - out.close(); - } - } - - - public static void main(String[] args) throws IOException, InterruptedException { - if (args.length < 3) { - System.out.println("Usage: Injector project-name (topic-name|none) (filename|none)"); - System.exit(1); - } - boolean writeToFile = false; - boolean writeToPubsub = true; - project = args[0]; - String topicName = args[1]; - String fileName = args[2]; - // The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if - // specified; otherwise, it will try to write to a file. - if ("none".equalsIgnoreCase(topicName)) { - writeToFile = true; - writeToPubsub = false; - } - if (writeToPubsub) { - // Create the PubSub client. - pubsub = InjectorUtils.getClient(); - // Create the PubSub topic as necessary. - topic = InjectorUtils.getFullyQualifiedTopicName(project, topicName); - InjectorUtils.createTopic(pubsub, topic); - System.out.println("Injecting to topic: " + topic); - } else { - if ("none".equalsIgnoreCase(fileName)) { - System.out.println("Filename not specified."); - System.exit(1); - } - System.out.println("Writing to file: " + fileName); - } - System.out.println("Starting Injector"); - - // Start off with some random live teams. - while (liveTeams.size() < NUM_LIVE_TEAMS) { - addLiveTeam(); - } - - // Publish messages at a rate determined by the QPS and Thread sleep settings. - for (int i = 0; true; i++) { - if (Thread.activeCount() > 10) { - System.err.println("I'm falling behind!"); - } - - // Decide if this should be a batch of late data. - final int numMessages; - final int delayInMillis; - if (i % LATE_DATA_RATE == 0) { - // Insert delayed data for one user (one message only) - delayInMillis = BASE_DELAY_IN_MILLIS + random.nextInt(FUZZY_DELAY_IN_MILLIS); - numMessages = 1; - System.out.println("DELAY(" + delayInMillis + ", " + numMessages + ")"); - } else { - System.out.print("."); - delayInMillis = 0; - numMessages = MIN_QPS + random.nextInt(QPS_RANGE); - } - - if (writeToFile) { // Won't use threading for the file write. - publishDataToFile(fileName, numMessages, delayInMillis); - } else { // Write to PubSub. - // Start a thread to inject some data. - new Thread( - () -> { - try { - publishData(numMessages, delayInMillis); - } catch (IOException e) { - System.err.println(e); - } - }) - .start(); - } - - // Wait before creating another injector thread. - Thread.sleep(THREAD_SLEEP_MS); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java deleted file mode 100644 index 5a0cf0166e..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.injector; - -import static com.google.common.base.Preconditions.checkNotNull; - -import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; -import com.google.api.client.googleapis.json.GoogleJsonResponseException; -import com.google.api.client.googleapis.util.Utils; -import com.google.api.client.http.HttpRequestInitializer; -import com.google.api.client.http.HttpStatusCodes; -import com.google.api.client.http.HttpTransport; -import com.google.api.client.json.JsonFactory; -import com.google.api.services.pubsub.Pubsub; -import com.google.api.services.pubsub.PubsubScopes; -import com.google.api.services.pubsub.model.Topic; -import java.io.IOException; - -class InjectorUtils { - - private static final String APP_NAME = "injector"; - - /** - * Builds a new Pubsub client and returns it. - */ - public static Pubsub getClient(final HttpTransport httpTransport, - final JsonFactory jsonFactory) - throws IOException { - checkNotNull(httpTransport); - checkNotNull(jsonFactory); - GoogleCredential credential = - GoogleCredential.getApplicationDefault(httpTransport, jsonFactory); - if (credential.createScopedRequired()) { - credential = credential.createScoped(PubsubScopes.all()); - } - if (credential.getClientAuthentication() != null) { - System.out.println("\n***Warning! You are not using service account credentials to " - + "authenticate.\nYou need to use service account credentials for this example," - + "\nsince user-level credentials do not have enough pubsub quota,\nand so you will run " - + "out of PubSub quota very quickly.\nSee " - + "https://developers.google.com/identity/protocols/application-default-credentials."); - System.exit(1); - } - HttpRequestInitializer initializer = - new RetryHttpInitializerWrapper(credential); - return new Pubsub.Builder(httpTransport, jsonFactory, initializer) - .setApplicationName(APP_NAME) - .build(); - } - - /** - * Builds a new Pubsub client with default HttpTransport and - * JsonFactory and returns it. - */ - public static Pubsub getClient() throws IOException { - return getClient(Utils.getDefaultTransport(), - Utils.getDefaultJsonFactory()); - } - - - /** - * Returns the fully qualified topic name for Pub/Sub. - */ - public static String getFullyQualifiedTopicName( - final String project, final String topic) { - return String.format("projects/%s/topics/%s", project, topic); - } - - /** - * Create a topic if it doesn't exist. - */ - public static void createTopic(Pubsub client, String fullTopicName) - throws IOException { - System.out.println("fullTopicName " + fullTopicName); - try { - client.projects().topics().get(fullTopicName).execute(); - } catch (GoogleJsonResponseException e) { - if (e.getStatusCode() == HttpStatusCodes.STATUS_CODE_NOT_FOUND) { - Topic topic = client.projects().topics() - .create(fullTopicName, new Topic()) - .execute(); - System.out.printf("Topic %s was created.%n", topic.getName()); - } - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java deleted file mode 100644 index e90fbcc18e..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.injector; - -import static com.google.common.base.Preconditions.checkNotNull; - -import com.google.api.client.auth.oauth2.Credential; -import com.google.api.client.http.HttpBackOffIOExceptionHandler; -import com.google.api.client.http.HttpBackOffUnsuccessfulResponseHandler; -import com.google.api.client.http.HttpRequest; -import com.google.api.client.http.HttpRequestInitializer; -import com.google.api.client.http.HttpUnsuccessfulResponseHandler; -import com.google.api.client.util.ExponentialBackOff; -import com.google.api.client.util.Sleeper; -import java.util.logging.Logger; - -/** - * RetryHttpInitializerWrapper will automatically retry upon RPC - * failures, preserving the auto-refresh behavior of the Google - * Credentials. - */ -public class RetryHttpInitializerWrapper implements HttpRequestInitializer { - - /** - * A private logger. - */ - private static final Logger LOG = - Logger.getLogger(RetryHttpInitializerWrapper.class.getName()); - - /** - * One minutes in miliseconds. - */ - private static final int ONEMINITUES = 60000; - - /** - * Intercepts the request for filling in the "Authorization" - * header field, as well as recovering from certain unsuccessful - * error codes wherein the Credential must refresh its token for a - * retry. - */ - private final Credential wrappedCredential; - - /** - * A sleeper; you can replace it with a mock in your test. - */ - private final Sleeper sleeper; - - /** - * A constructor. - * - * @param wrappedCredential Credential which will be wrapped and - * used for providing auth header. - */ - public RetryHttpInitializerWrapper(final Credential wrappedCredential) { - this(wrappedCredential, Sleeper.DEFAULT); - } - - /** - * A protected constructor only for testing. - * - * @param wrappedCredential Credential which will be wrapped and - * used for providing auth header. - * @param sleeper Sleeper for easy testing. - */ - RetryHttpInitializerWrapper( - final Credential wrappedCredential, final Sleeper sleeper) { - this.wrappedCredential = checkNotNull(wrappedCredential); - this.sleeper = sleeper; - } - - /** - * Initializes the given request. - */ - @Override - public final void initialize(final HttpRequest request) { - request.setReadTimeout(2 * ONEMINITUES); // 2 minutes read timeout - final HttpUnsuccessfulResponseHandler backoffHandler = - new HttpBackOffUnsuccessfulResponseHandler( - new ExponentialBackOff()) - .setSleeper(sleeper); - request.setInterceptor(wrappedCredential); - request.setUnsuccessfulResponseHandler( - (request1, response, supportsRetry) -> { - if (wrappedCredential.handleResponse(request1, response, supportsRetry)) { - // If credential decides it can handle it, the return code or message indicated - // something specific to authentication, and no backoff is desired. - return true; - } else if (backoffHandler.handleResponse(request1, response, supportsRetry)) { - // Otherwise, we defer to the judgement of our internal backoff handler. - LOG.info("Retrying " + request1.getUrl().toString()); - return true; - } else { - return false; - } - }); - request.setIOExceptionHandler( - new HttpBackOffIOExceptionHandler(new ExponentialBackOff()) - .setSleeper(sleeper)); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java deleted file mode 100644 index dc28ad72ea..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/GameConstants.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.utils; - -import java.util.TimeZone; -import org.joda.time.DateTimeZone; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; - -/** - * Shared constants between game series classes. - */ -public class GameConstants { - - public static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms"; - - public static final DateTimeFormatter DATE_TIME_FORMATTER = - DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/Los_Angeles"))); -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java deleted file mode 100644 index d35a4ffcfc..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.utils; - -import com.google.api.services.bigquery.model.TableFieldSchema; -import com.google.api.services.bigquery.model.TableReference; -import com.google.api.services.bigquery.model.TableRow; -import com.google.api.services.bigquery.model.TableSchema; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; - -/** - * Generate, format, and write BigQuery table row information. Use provided information about - * the field names and types, as well as lambda functions that describe how to generate their - * values. - */ -public class WriteToBigQuery - extends PTransform, PDone> { - - protected String projectId; - protected String datasetId; - protected String tableName; - protected Map> fieldInfo; - - public WriteToBigQuery() { - } - - public WriteToBigQuery( - String projectId, - String datasetId, - String tableName, - Map> fieldInfo) { - this.projectId = projectId; - this.datasetId = datasetId; - this.tableName = tableName; - this.fieldInfo = fieldInfo; - } - - /** - * A {@link Serializable} function from a {@link DoFn.ProcessContext} - * and {@link BoundedWindow} to the value for that field. - */ - public interface FieldFn extends Serializable { - Object apply(DoFn.ProcessContext context, BoundedWindow window); - } - - /** Define a class to hold information about output table field definitions. */ - public static class FieldInfo implements Serializable { - // The BigQuery 'type' of the field - private String fieldType; - // A lambda function to generate the field value - private FieldFn fieldFn; - - public FieldInfo(String fieldType, - FieldFn fieldFn) { - this.fieldType = fieldType; - this.fieldFn = fieldFn; - } - - String getFieldType() { - return this.fieldType; - } - - FieldFn getFieldFn() { - return this.fieldFn; - } - } - - /** Convert each key/score pair into a BigQuery TableRow as specified by fieldFn. */ - protected class BuildRowFn extends DoFn { - - @ProcessElement - public void processElement(ProcessContext c, BoundedWindow window) { - - TableRow row = new TableRow(); - for (Map.Entry> entry : fieldInfo.entrySet()) { - String key = entry.getKey(); - FieldInfo fcnInfo = entry.getValue(); - FieldFn fcn = fcnInfo.getFieldFn(); - row.set(key, fcn.apply(c, window)); - } - c.output(row); - } - } - - /** Build the output table schema. */ - protected TableSchema getSchema() { - List fields = new ArrayList<>(); - for (Map.Entry> entry : fieldInfo.entrySet()) { - String key = entry.getKey(); - FieldInfo fcnInfo = entry.getValue(); - String bqType = fcnInfo.getFieldType(); - fields.add(new TableFieldSchema().setName(key).setType(bqType)); - } - return new TableSchema().setFields(fields); - } - - @Override - public PDone expand(PCollection teamAndScore) { - teamAndScore - .apply("ConvertToRow", ParDo.of(new BuildRowFn())) - .apply( - BigQueryIO.writeTableRows() - .to(getTable(projectId, datasetId, tableName)) - .withSchema(getSchema()) - .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) - .withWriteDisposition(WriteDisposition.WRITE_APPEND)); - return PDone.in(teamAndScore.getPipeline()); - } - - /** Utility to construct an output table reference. */ - static TableReference getTable(String projectId, String datasetId, String tableName) { - TableReference table = new TableReference(); - table.setDatasetId(datasetId); - table.setProjectId(projectId); - table.setTableId(tableName); - return table; - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java deleted file mode 100644 index 76fa3ff075..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.utils; - -import static com.google.common.base.Preconditions.checkArgument; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.TimeZone; -import java.util.stream.Collectors; -import org.apache.beam.sdk.io.FileBasedSink; -import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy; -import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions; -import org.apache.beam.sdk.io.fs.ResourceId; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; -import org.joda.time.DateTimeZone; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; - -/** - * Generate, format, and write rows. Use provided information about the field names and types, as - * well as lambda functions that describe how to generate their values. - */ -public class WriteToText - extends PTransform, PDone> { - - private static final DateTimeFormatter formatter = - DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/Los_Angeles"))); - - protected String filenamePrefix; - protected Map> fieldFn; - protected boolean windowed; - - public WriteToText() { - } - - public WriteToText( - String filenamePrefix, - Map> fieldFn, - boolean windowed) { - this.filenamePrefix = filenamePrefix; - this.fieldFn = fieldFn; - this.windowed = windowed; - } - - /** - * A {@link Serializable} function from a {@link DoFn.ProcessContext} - * and {@link BoundedWindow} to the value for that field. - */ - public interface FieldFn extends Serializable { - Object apply(DoFn.ProcessContext context, BoundedWindow window); - } - - /** Convert each key/score pair into a row as specified by fieldFn. */ - protected class BuildRowFn extends DoFn { - - @ProcessElement - public void processElement(ProcessContext c, BoundedWindow window) { - List fields = new ArrayList<>(); - for (Map.Entry> entry : fieldFn.entrySet()) { - String key = entry.getKey(); - FieldFn fcn = entry.getValue(); - fields.add(key + ": " + fcn.apply(c, window)); - } - String result = fields.stream().collect(Collectors.joining(", ")); - c.output(result); - } - } - - /** - * A {@link DoFn} that writes elements to files with names deterministically derived from the - * lower and upper bounds of their key (an {@link IntervalWindow}). - */ - protected static class WriteOneFilePerWindow extends PTransform, PDone> { - - private final String filenamePrefix; - - public WriteOneFilePerWindow(String filenamePrefix) { - this.filenamePrefix = filenamePrefix; - } - - @Override - public PDone expand(PCollection input) { - // Verify that the input has a compatible window type. - checkArgument( - input.getWindowingStrategy().getWindowFn().windowCoder() == IntervalWindow.getCoder()); - - ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); - - return input.apply( - TextIO.write() - .to(new PerWindowFiles(resource)) - .withTempDirectory(resource.getCurrentDirectory()) - .withWindowedWrites() - .withNumShards(3)); - } - } - - /** - * A {@link FilenamePolicy} produces a base file name for a write based on metadata about the data - * being written. This always includes the shard number and the total number of shards. For - * windowed writes, it also includes the window and pane index (a sequence number assigned to each - * trigger firing). - */ - protected static class PerWindowFiles extends FilenamePolicy { - - private final ResourceId prefix; - - public PerWindowFiles(ResourceId prefix) { - this.prefix = prefix; - } - - public String filenamePrefixForWindow(IntervalWindow window) { - String filePrefix = prefix.isDirectory() ? "" : prefix.getFilename(); - return String.format( - "%s-%s-%s", filePrefix, formatter.print(window.start()), formatter.print(window.end())); - } - - @Override - public ResourceId windowedFilename(int shardNumber, - int numShards, - BoundedWindow window, - PaneInfo paneInfo, - OutputFileHints outputFileHints) { - IntervalWindow intervalWindow = (IntervalWindow) window; - String filename = - String.format( - "%s-%s-of-%s%s", - filenamePrefixForWindow(intervalWindow), - shardNumber, - numShards, - outputFileHints.getSuggestedFilenameSuffix()); - return prefix.getCurrentDirectory().resolve(filename, StandardResolveOptions.RESOLVE_FILE); - } - - @Override - public ResourceId unwindowedFilename( - int shardNumber, int numShards, OutputFileHints outputFileHints) { - throw new UnsupportedOperationException("Unsupported."); - } - } - - @Override - public PDone expand(PCollection teamAndScore) { - if (windowed) { - teamAndScore - .apply("ConvertToRow", ParDo.of(new BuildRowFn())) - .apply(new WriteToText.WriteOneFilePerWindow(filenamePrefix)); - } else { - teamAndScore - .apply("ConvertToRow", ParDo.of(new BuildRowFn())) - .apply(TextIO.write().to(filenamePrefix)); - } - return PDone.in(teamAndScore.getPipeline()); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java deleted file mode 100644 index 6aef88706d..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.utils; - -import com.google.api.services.bigquery.model.TableRow; -import java.util.Map; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; - -/** - * Generate, format, and write BigQuery table row information. Subclasses {@link WriteToBigQuery} - * to require windowing; so this subclass may be used for writes that require access to the - * context's window information. - */ -public class WriteWindowedToBigQuery - extends WriteToBigQuery { - - public WriteWindowedToBigQuery( - String projectId, String datasetId, String tableName, Map> fieldInfo) { - super(projectId, datasetId, tableName, fieldInfo); - } - - /** Convert each key/score pair into a BigQuery TableRow. */ - protected class BuildRowFn extends DoFn { - @ProcessElement - public void processElement(ProcessContext c, BoundedWindow window) { - - TableRow row = new TableRow(); - for (Map.Entry> entry : fieldInfo.entrySet()) { - String key = entry.getKey(); - FieldInfo fcnInfo = entry.getValue(); - row.set(key, fcnInfo.getFieldFn().apply(c, window)); - } - c.output(row); - } - } - - @Override - public PDone expand(PCollection teamAndScore) { - teamAndScore - .apply("ConvertToRow", ParDo.of(new BuildRowFn())) - .apply(BigQueryIO.writeTableRows() - .to(getTable(projectId, datasetId, tableName)) - .withSchema(getSchema()) - .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) - .withWriteDisposition(WriteDisposition.WRITE_APPEND)); - return PDone.in(teamAndScore.getPipeline()); - } - -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java deleted file mode 100644 index 0fbee20cb5..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import com.google.common.io.Files; -import java.io.File; -import java.nio.charset.StandardCharsets; -import ${package}.DebuggingWordCount.WordCountOptions; -import org.apache.beam.sdk.testing.TestPipeline; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests for {@link DebuggingWordCount}. - */ -@RunWith(JUnit4.class) -public class DebuggingWordCountTest { - @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); - - private String getFilePath(String filePath) { - if (filePath.contains(":")) { - return filePath.replace("\\", "/").split(":", -1)[1]; - } - return filePath; - } - - @Test - public void testDebuggingWordCount() throws Exception { - File inputFile = tmpFolder.newFile(); - File outputFile = tmpFolder.newFile(); - Files.write( - "stomach secret Flourish message Flourish here Flourish", - inputFile, - StandardCharsets.UTF_8); - WordCountOptions options = - TestPipeline.testingPipelineOptions().as(WordCountOptions.class); - options.setInputFile(getFilePath(inputFile.getAbsolutePath())); - options.setOutput(getFilePath(outputFile.getAbsolutePath())); - DebuggingWordCount.runDebuggingWordCount(options); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/MinimalWordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/MinimalWordCountTest.java deleted file mode 100644 index f4c8b160d7..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/MinimalWordCountTest.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import com.google.common.collect.ImmutableList; -import java.io.IOException; -import java.io.Serializable; -import java.nio.channels.FileChannel; -import java.nio.file.Files; -import java.nio.file.StandardOpenOption; -import java.util.Arrays; -import org.apache.beam.sdk.extensions.gcp.options.GcsOptions; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.FlatMapElements; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.util.GcsUtil; -import org.apache.beam.sdk.util.gcsfs.GcsPath; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; -import org.mockito.Mockito; - -/** - * To keep {@link MinimalWordCount} simple, it is not factored or testable. This test - * file should be maintained with a copy of its code for a basic smoke test. - */ -@RunWith(JUnit4.class) -public class MinimalWordCountTest implements Serializable { - - @Rule - public TestPipeline p = TestPipeline.create().enableAbandonedNodeEnforcement(false); - - /** - * A basic smoke test that ensures there is no crash at pipeline construction time. - */ - @Test - public void testMinimalWordCount() throws Exception { - p.getOptions().as(GcsOptions.class).setGcsUtil(buildMockGcsUtil()); - - p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) - .apply( - FlatMapElements.into(TypeDescriptors.strings()) - .via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))) - .apply(Filter.by((String word) -> !word.isEmpty())) - .apply(Count.perElement()) - .apply( - MapElements.into(TypeDescriptors.strings()) - .via( - (KV wordCount) -> - wordCount.getKey() + ": " + wordCount.getValue())) - .apply(TextIO.write().to("gs://your-output-bucket/and-output-prefix")); - } - - private GcsUtil buildMockGcsUtil() throws IOException { - GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class); - - // Any request to open gets a new bogus channel - Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class))) - .then( - invocation -> - FileChannel.open( - Files.createTempFile("channel-", ".tmp"), - StandardOpenOption.CREATE, - StandardOpenOption.DELETE_ON_CLOSE)); - - // Any request for expansion returns a list containing the original GcsPath - // This is required to pass validation that occurs in TextIO during apply() - Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class))) - .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0])); - - return mockGcsUtil; - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java deleted file mode 100644 index 91a1bf8edc..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import java.util.List; -import ${package}.WordCount.CountWords; -import ${package}.WordCount.ExtractWordsFn; -import ${package}.WordCount.FormatAsTextFn; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.ValidatesRunner; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnTester; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.values.PCollection; -import org.hamcrest.CoreMatchers; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests of WordCount. - */ -@RunWith(JUnit4.class) -public class WordCountTest { - - /** Example test that tests a specific {@link DoFn}. */ - @Test - public void testExtractWordsFn() throws Exception { - DoFnTester extractWordsFn = - DoFnTester.of(new ExtractWordsFn()); - - Assert.assertThat(extractWordsFn.processBundle(" some input words "), - CoreMatchers.hasItems("some", "input", "words")); - Assert.assertThat(extractWordsFn.processBundle(" "), CoreMatchers.hasItems()); - Assert.assertThat(extractWordsFn.processBundle(" some ", " input", " words"), - CoreMatchers.hasItems("some", "input", "words")); - } - - static final String[] WORDS_ARRAY = new String[] { - "hi there", "hi", "hi sue bob", - "hi sue", "", "bob hi"}; - - static final List WORDS = Arrays.asList(WORDS_ARRAY); - - static final String[] COUNTS_ARRAY = new String[] { - "hi: 5", "there: 1", "sue: 2", "bob: 2"}; - - @Rule - public TestPipeline p = TestPipeline.create(); - - /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */ - @Test - @Category(ValidatesRunner.class) - public void testCountWords() throws Exception { - PCollection input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of())); - - PCollection output = input.apply(new CountWords()) - .apply(MapElements.via(new FormatAsTextFn())); - - PAssert.that(output).containsInAnyOrder(COUNTS_ARRAY); - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java deleted file mode 100644 index 5cbdc6244f..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.io.Serializable; -import java.util.Arrays; -import java.util.List; -import ${package}.complete.game.GameStats.CalculateSpammyUsers; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.ValidatesRunner; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests of GameStats. - * Because the pipeline was designed for easy readability and explanations, it lacks good - * modularity for testing. See our testing documentation for better ideas: - * https://beam.apache.org/documentation/pipelines/test-your-pipeline/ - */ -@RunWith(JUnit4.class) -public class GameStatsTest implements Serializable { - - // User scores - static final List> USER_SCORES = Arrays.asList( - KV.of("Robot-2", 66), KV.of("Robot-1", 116), KV.of("user7_AndroidGreenKookaburra", 23), - KV.of("user7_AndroidGreenKookaburra", 1), - KV.of("user19_BisqueBilby", 14), KV.of("user13_ApricotQuokka", 15), - KV.of("user18_BananaEmu", 25), KV.of("user6_AmberEchidna", 8), - KV.of("user2_AmberQuokka", 6), KV.of("user0_MagentaKangaroo", 4), - KV.of("user0_MagentaKangaroo", 3), KV.of("user2_AmberCockatoo", 13), - KV.of("user7_AlmondWallaby", 15), KV.of("user6_AmberNumbat", 11), - KV.of("user6_AmberQuokka", 4)); - - // The expected list of 'spammers'. - static final List> SPAMMERS = Arrays.asList( - KV.of("Robot-2", 66), KV.of("Robot-1", 116)); - - @Rule - public TestPipeline p = TestPipeline.create(); - - /** Test the calculation of 'spammy users'. */ - @Test - @Category(ValidatesRunner.class) - public void testCalculateSpammyUsers() throws Exception { - PCollection> input = p.apply(Create.of(USER_SCORES)); - PCollection> output = input.apply(new CalculateSpammyUsers()); - - // Check the set of spammers. - PAssert.that(output).containsInAnyOrder(SPAMMERS); - - p.run().waitUntilFinish(); - } - - @Test - public void testGameStatsOptions() { - PipelineOptionsFactory.as(GameStats.Options.class); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java deleted file mode 100644 index 17d459df93..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.io.Serializable; -import java.util.Arrays; -import java.util.List; -import ${package}.complete.game.UserScore.GameActionInfo; -import ${package}.complete.game.UserScore.ParseEventFn; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.ValidatesRunner; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.joda.time.Instant; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests of HourlyTeamScore. - * Because the pipeline was designed for easy readability and explanations, it lacks good - * modularity for testing. See our testing documentation for better ideas: - * https://beam.apache.org/documentation/pipelines/test-your-pipeline/ - */ -@RunWith(JUnit4.class) -public class HourlyTeamScoreTest implements Serializable { - - static final String[] GAME_EVENTS_ARRAY = new String[] { - "user0_MagentaKangaroo,MagentaKangaroo,3,1447955630000,2015-11-19 09:53:53.444", - "user13_ApricotQuokka,ApricotQuokka,15,1447955630000,2015-11-19 09:53:53.444", - "user6_AmberNumbat,AmberNumbat,11,1447955630000,2015-11-19 09:53:53.444", - "user7_AlmondWallaby,AlmondWallaby,15,1447955630000,2015-11-19 09:53:53.444", - "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,12,1447955630000,2015-11-19 09:53:53.444", - "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,11,1447955630000,2015-11-19 09:53:53.444", - "user19_BisqueBilby,BisqueBilby,6,1447955630000,2015-11-19 09:53:53.444", - "user19_BisqueBilby,BisqueBilby,8,1447955630000,2015-11-19 09:53:53.444", - // time gap... - "user0_AndroidGreenEchidna,AndroidGreenEchidna,0,1447965690000,2015-11-19 12:41:31.053", - "user0_MagentaKangaroo,MagentaKangaroo,4,1447965690000,2015-11-19 12:41:31.053", - "user2_AmberCockatoo,AmberCockatoo,13,1447965690000,2015-11-19 12:41:31.053", - "user18_BananaEmu,BananaEmu,7,1447965690000,2015-11-19 12:41:31.053", - "user3_BananaEmu,BananaEmu,17,1447965690000,2015-11-19 12:41:31.053", - "user18_BananaEmu,BananaEmu,1,1447965690000,2015-11-19 12:41:31.053", - "user18_ApricotCaneToad,ApricotCaneToad,14,1447965690000,2015-11-19 12:41:31.053" - }; - - - static final List GAME_EVENTS = Arrays.asList(GAME_EVENTS_ARRAY); - - - // Used to check the filtering. - static final KV[] FILTERED_EVENTS = new KV[] { - KV.of("user0_AndroidGreenEchidna", 0), KV.of("user0_MagentaKangaroo", 4), - KV.of("user2_AmberCockatoo", 13), - KV.of("user18_BananaEmu", 7), KV.of("user3_BananaEmu", 17), - KV.of("user18_BananaEmu", 1), KV.of("user18_ApricotCaneToad", 14) - }; - - @Rule - public TestPipeline p = TestPipeline.create(); - - /** Test the filtering. */ - @Test - @Category(ValidatesRunner.class) - public void testUserScoresFilter() throws Exception { - - final Instant startMinTimestamp = new Instant(1447965680000L); - - PCollection input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of())); - - PCollection> output = input - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) - - .apply("FilterStartTime", Filter.by( - (GameActionInfo gInfo) - -> gInfo.getTimestamp() > startMinTimestamp.getMillis())) - // run a map to access the fields in the result. - .apply(MapElements - .into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) - .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))); - - PAssert.that(output).containsInAnyOrder(FILTERED_EVENTS); - - p.run().waitUntilFinish(); - } - - @Test - public void testUserScoreOptions() { - PipelineOptionsFactory.as(HourlyTeamScore.Options.class); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java deleted file mode 100644 index 2478c07fa8..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package ${package}.complete.game; - -import static org.hamcrest.Matchers.hasItem; -import static org.junit.Assert.assertThat; - -import com.google.common.collect.ImmutableMap; -import java.io.Serializable; -import ${package}.complete.game.LeaderBoard.CalculateTeamScores; -import ${package}.complete.game.LeaderBoard.CalculateUserScores; -import ${package}.complete.game.UserScore.GameActionInfo; -import org.apache.beam.sdk.coders.AvroCoder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.TestStream; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.GlobalWindow; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TimestampedValue; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests for {@link LeaderBoard}. - */ -@RunWith(JUnit4.class) -public class LeaderBoardTest implements Serializable { - private static final Duration ALLOWED_LATENESS = Duration.standardHours(1); - private static final Duration TEAM_WINDOW_DURATION = Duration.standardMinutes(20); - private Instant baseTime = new Instant(0); - - @Rule - public TestPipeline p = TestPipeline.create(); - /** - * Some example users, on two separate teams. - */ - private enum TestUser { - RED_ONE("scarlet", "red"), RED_TWO("burgundy", "red"), - BLUE_ONE("navy", "blue"), BLUE_TWO("sky", "blue"); - - private final String userName; - private final String teamName; - - TestUser(String userName, String teamName) { - this.userName = userName; - this.teamName = teamName; - } - - public String getUser() { - return userName; - } - - public String getTeam() { - return teamName; - } - } - - /** - * A test of the {@link CalculateTeamScores} {@link PTransform} when all of the elements arrive - * on time (ahead of the watermark). - */ - @Test - public void testTeamScoresOnTime() { - - TestStream createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)) - // Start at the epoch - .advanceWatermarkTo(baseTime) - // add some elements ahead of the watermark - .addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), - event(TestUser.BLUE_ONE, 2, Duration.standardMinutes(1)), - event(TestUser.RED_TWO, 3, Duration.standardSeconds(22)), - event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(3))) - // The watermark advances slightly, but not past the end of the window - .advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(3))) - // Add some more on time elements - .addElements(event(TestUser.RED_ONE, 1, Duration.standardMinutes(4)), - event(TestUser.BLUE_ONE, 2, Duration.standardSeconds(270))) - // The window should close and emit an ON_TIME pane - .advanceWatermarkToInfinity(); - - PCollection> teamScores = p.apply(createEvents) - .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS)); - - String blueTeam = TestUser.BLUE_ONE.getTeam(); - String redTeam = TestUser.RED_ONE.getTeam(); - PAssert.that(teamScores) - .inOnTimePane(new IntervalWindow(baseTime, TEAM_WINDOW_DURATION)) - .containsInAnyOrder(KV.of(blueTeam, 12), KV.of(redTeam, 4)); - - p.run().waitUntilFinish(); - } - - /** - * A test of the {@link CalculateTeamScores} {@link PTransform} when all of the elements arrive - * on time, and the processing time advances far enough for speculative panes. - */ - @Test - public void testTeamScoresSpeculative() { - - TestStream createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)) - // Start at the epoch - .advanceWatermarkTo(baseTime) - .addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), - event(TestUser.BLUE_ONE, 2, Duration.standardMinutes(1))) - // Some time passes within the runner, which causes a speculative pane containing the blue - // team's score to be emitted - .advanceProcessingTime(Duration.standardMinutes(10)) - .addElements(event(TestUser.RED_TWO, 5, Duration.standardMinutes(3))) - // Some additional time passes and we get a speculative pane for the red team - .advanceProcessingTime(Duration.standardMinutes(12)) - .addElements(event(TestUser.BLUE_TWO, 3, Duration.standardSeconds(22))) - // More time passes and a speculative pane containing a refined value for the blue pane is - // emitted - .advanceProcessingTime(Duration.standardMinutes(10)) - // Some more events occur - .addElements(event(TestUser.RED_ONE, 4, Duration.standardMinutes(4)), - event(TestUser.BLUE_TWO, 2, Duration.standardMinutes(2))) - // The window closes and we get an ON_TIME pane that contains all of the updates - .advanceWatermarkToInfinity(); - - PCollection> teamScores = p.apply(createEvents) - .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS)); - - String blueTeam = TestUser.BLUE_ONE.getTeam(); - String redTeam = TestUser.RED_ONE.getTeam(); - IntervalWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION); - // The window contains speculative panes alongside the on-time pane - PAssert.that(teamScores) - .inWindow(window) - .containsInAnyOrder(KV.of(blueTeam, 10) /* The on-time blue pane */, - KV.of(redTeam, 9) /* The on-time red pane */, - KV.of(blueTeam, 5) /* The first blue speculative pane */, - KV.of(blueTeam, 8) /* The second blue speculative pane */, - KV.of(redTeam, 5) /* The red speculative pane */); - PAssert.that(teamScores) - .inOnTimePane(window) - .containsInAnyOrder(KV.of(blueTeam, 10), KV.of(redTeam, 9)); - - p.run().waitUntilFinish(); - } - - /** - * A test where elements arrive behind the watermark (late data), but before the end of the - * window. These elements are emitted on time. - */ - @Test - public void testTeamScoresUnobservablyLate() { - - BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION); - TestStream createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)) - .advanceWatermarkTo(baseTime) - .addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), - event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(8)), - event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), - event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(5))) - .advanceWatermarkTo(baseTime.plus(TEAM_WINDOW_DURATION).minus(Duration.standardMinutes(1))) - // These events are late, but the window hasn't closed yet, so the elements are in the - // on-time pane - .addElements(event(TestUser.RED_TWO, 2, Duration.ZERO), - event(TestUser.RED_TWO, 5, Duration.standardMinutes(1)), - event(TestUser.BLUE_TWO, 2, Duration.standardSeconds(90)), - event(TestUser.RED_TWO, 3, Duration.standardMinutes(3))) - .advanceWatermarkTo(baseTime.plus(TEAM_WINDOW_DURATION).plus(Duration.standardMinutes(1))) - .advanceWatermarkToInfinity(); - PCollection> teamScores = p.apply(createEvents) - .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS)); - - String blueTeam = TestUser.BLUE_ONE.getTeam(); - String redTeam = TestUser.RED_ONE.getTeam(); - // The On Time pane contains the late elements that arrived before the end of the window - PAssert.that(teamScores) - .inOnTimePane(window) - .containsInAnyOrder(KV.of(redTeam, 14), KV.of(blueTeam, 13)); - - p.run().waitUntilFinish(); - } - - /** - * A test where elements arrive behind the watermark (late data) after the watermark passes the - * end of the window, but before the maximum allowed lateness. These elements are emitted in a - * late pane. - */ - @Test - public void testTeamScoresObservablyLate() { - - Instant firstWindowCloses = baseTime.plus(ALLOWED_LATENESS).plus(TEAM_WINDOW_DURATION); - TestStream createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)) - .advanceWatermarkTo(baseTime) - .addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), - event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(8))) - .advanceProcessingTime(Duration.standardMinutes(10)) - .advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(3))) - .addElements(event(TestUser.RED_ONE, 3, Duration.standardMinutes(1)), - event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), - event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(5))) - .advanceWatermarkTo(firstWindowCloses.minus(Duration.standardMinutes(1))) - // These events are late but should still appear in a late pane - .addElements(event(TestUser.RED_TWO, 2, Duration.ZERO), - event(TestUser.RED_TWO, 5, Duration.standardMinutes(1)), - event(TestUser.RED_TWO, 3, Duration.standardMinutes(3))) - // A late refinement is emitted due to the advance in processing time, but the window has - // not yet closed because the watermark has not advanced - .advanceProcessingTime(Duration.standardMinutes(12)) - // These elements should appear in the final pane - .addElements(event(TestUser.RED_TWO, 9, Duration.standardMinutes(1)), - event(TestUser.RED_TWO, 1, Duration.standardMinutes(3))) - .advanceWatermarkToInfinity(); - - PCollection> teamScores = p.apply(createEvents) - .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS)); - - BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION); - String blueTeam = TestUser.BLUE_ONE.getTeam(); - String redTeam = TestUser.RED_ONE.getTeam(); - PAssert.that(teamScores) - .inWindow(window) - .satisfies( - input -> { - // The final sums need not exist in the same pane, but must appear in the output - // PCollection - assertThat(input, hasItem(KV.of(blueTeam, 11))); - assertThat(input, hasItem(KV.of(redTeam, 27))); - return null; - }); - PAssert.thatMap(teamScores) - // The closing behavior of CalculateTeamScores precludes an inFinalPane matcher - .inOnTimePane(window) - .isEqualTo(ImmutableMap.builder().put(redTeam, 7) - .put(blueTeam, 11) - .build()); - - // No final pane is emitted for the blue team, as all of their updates have been taken into - // account in earlier panes - PAssert.that(teamScores).inFinalPane(window).containsInAnyOrder(KV.of(redTeam, 27)); - - p.run().waitUntilFinish(); - } - - /** - * A test where elements arrive beyond the maximum allowed lateness. These elements are dropped - * within {@link CalculateTeamScores} and do not impact the final result. - */ - @Test - public void testTeamScoresDroppablyLate() { - - BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION); - TestStream infos = TestStream.create(AvroCoder.of(GameActionInfo.class)) - .addElements(event(TestUser.BLUE_ONE, 12, Duration.ZERO), - event(TestUser.RED_ONE, 3, Duration.ZERO)) - .advanceWatermarkTo(window.maxTimestamp()) - .addElements(event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), - event(TestUser.BLUE_TWO, 3, Duration.ZERO), - event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(3))) - // Move the watermark to the end of the window to output on time - .advanceWatermarkTo(baseTime.plus(TEAM_WINDOW_DURATION)) - // Move the watermark past the end of the allowed lateness plus the end of the window - .advanceWatermarkTo(baseTime.plus(ALLOWED_LATENESS) - .plus(TEAM_WINDOW_DURATION).plus(Duration.standardMinutes(1))) - // These elements within the expired window are droppably late, and will not appear in the - // output - .addElements( - event(TestUser.BLUE_TWO, 3, TEAM_WINDOW_DURATION.minus(Duration.standardSeconds(5))), - event(TestUser.RED_ONE, 7, Duration.standardMinutes(4))) - .advanceWatermarkToInfinity(); - PCollection> teamScores = p.apply(infos) - .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS)); - - String blueTeam = TestUser.BLUE_ONE.getTeam(); - String redTeam = TestUser.RED_ONE.getTeam(); - // Only one on-time pane and no late panes should be emitted - PAssert.that(teamScores) - .inWindow(window) - .containsInAnyOrder(KV.of(redTeam, 7), KV.of(blueTeam, 18)); - // No elements are added before the watermark passes the end of the window plus the allowed - // lateness, so no refinement should be emitted - PAssert.that(teamScores).inFinalPane(window).empty(); - - p.run().waitUntilFinish(); - } - - /** - * A test where elements arrive both on-time and late in {@link CalculateUserScores}, which emits - * output into the {@link GlobalWindow}. All elements that arrive should be taken into account, - * even if they arrive later than the maximum allowed lateness. - */ - @Test - public void testUserScore() { - - TestStream infos = - TestStream.create(AvroCoder.of(GameActionInfo.class)) - .addElements( - event(TestUser.BLUE_ONE, 12, Duration.ZERO), - event(TestUser.RED_ONE, 3, Duration.ZERO)) - .advanceProcessingTime(Duration.standardMinutes(7)) - .addElements( - event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), - event(TestUser.BLUE_TWO, 3, Duration.ZERO), - event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(3))) - .advanceProcessingTime(Duration.standardMinutes(5)) - .advanceWatermarkTo(baseTime.plus(ALLOWED_LATENESS).plus(Duration.standardHours(12))) - // Late elements are always observable within the global window - they arrive before - // the window closes, so they will appear in a pane, even if they arrive after the - // allowed lateness, and are taken into account alongside on-time elements - .addElements( - event(TestUser.RED_ONE, 3, Duration.standardMinutes(7)), - event(TestUser.RED_ONE, 2, (ALLOWED_LATENESS).plus(Duration.standardHours(13)))) - .advanceProcessingTime(Duration.standardMinutes(6)) - .addElements(event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(12))) - .advanceProcessingTime(Duration.standardMinutes(20)) - .advanceWatermarkToInfinity(); - - PCollection> userScores = - p.apply(infos).apply(new CalculateUserScores(ALLOWED_LATENESS)); - - // User scores are emitted in speculative panes in the Global Window - this matcher choice - // ensures that panes emitted by the watermark advancing to positive infinity are not included, - // as that will not occur outside of tests - PAssert.that(userScores) - .inEarlyGlobalWindowPanes() - .containsInAnyOrder(KV.of(TestUser.BLUE_ONE.getUser(), 15), - KV.of(TestUser.RED_ONE.getUser(), 7), - KV.of(TestUser.RED_ONE.getUser(), 12), - KV.of(TestUser.BLUE_TWO.getUser(), 3), - KV.of(TestUser.BLUE_TWO.getUser(), 8)); - - p.run().waitUntilFinish(); - } - - @Test - public void testLeaderBoardOptions() { - PipelineOptionsFactory.as(LeaderBoard.Options.class); - } - - private TimestampedValue event( - TestUser user, - int score, - Duration baseTimeOffset) { - return TimestampedValue.of(new GameActionInfo(user.getUser(), - user.getTeam(), - score, - baseTime.plus(baseTimeOffset).getMillis()), baseTime.plus(baseTimeOffset)); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java deleted file mode 100644 index c80c57f4fc..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/StatefulTeamScoreTest.java +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package ${package}.complete.game; - -import ${package}.complete.game.StatefulTeamScore.UpdateTeamScoreFn; -import ${package}.complete.game.UserScore.GameActionInfo; -import org.apache.beam.sdk.coders.AvroCoder; -import org.apache.beam.sdk.coders.KvCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.TestStream; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.GlobalWindow; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TimestampedValue; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests for {@link StatefulTeamScore}. - */ -@RunWith(JUnit4.class) -public class StatefulTeamScoreTest { - - private Instant baseTime = new Instant(0); - - @Rule - public TestPipeline p = TestPipeline.create(); - - /** - * Some example users, on two separate teams. - */ - private enum TestUser { - RED_ONE("scarlet", "red"), RED_TWO("burgundy", "red"), - BLUE_ONE("navy", "blue"), BLUE_TWO("sky", "blue"); - - private final String userName; - private final String teamName; - - TestUser(String userName, String teamName) { - this.userName = userName; - this.teamName = teamName; - } - - public String getUser() { - return userName; - } - - public String getTeam() { - return teamName; - } - } - - /** - * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs - * correctly for one team. - */ - @Test - public void testScoreUpdatesOneTeam() { - - TestStream> createEvents = TestStream.create(KvCoder.of( - StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) - .advanceWatermarkTo(baseTime) - .addElements( - event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)), - event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)), - event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)), - event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)), - event(TestUser.RED_TWO, 201, Duration.standardSeconds(50)) - ) - .advanceWatermarkToInfinity(); - - PCollection> teamScores = p.apply(createEvents) - .apply(ParDo.of(new UpdateTeamScoreFn(100))); - - String redTeam = TestUser.RED_ONE.getTeam(); - - PAssert.that(teamScores) - .inWindow(GlobalWindow.INSTANCE) - .containsInAnyOrder( - KV.of(redTeam, 100), - KV.of(redTeam, 200), - KV.of(redTeam, 401) - ); - - p.run().waitUntilFinish(); - } - - /** - * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs - * correctly for multiple teams. - */ - @Test - public void testScoreUpdatesPerTeam() { - - TestStream> createEvents = TestStream.create(KvCoder.of( - StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) - .advanceWatermarkTo(baseTime) - .addElements( - event(TestUser.RED_ONE, 50, Duration.standardSeconds(10)), - event(TestUser.RED_TWO, 50, Duration.standardSeconds(20)), - event(TestUser.BLUE_ONE, 70, Duration.standardSeconds(30)), - event(TestUser.BLUE_TWO, 80, Duration.standardSeconds(40)), - event(TestUser.BLUE_TWO, 50, Duration.standardSeconds(50)) - ) - .advanceWatermarkToInfinity(); - - PCollection> teamScores = p.apply(createEvents) - .apply(ParDo.of(new UpdateTeamScoreFn(100))); - - String redTeam = TestUser.RED_ONE.getTeam(); - String blueTeam = TestUser.BLUE_ONE.getTeam(); - - PAssert.that(teamScores) - .inWindow(GlobalWindow.INSTANCE) - .containsInAnyOrder( - KV.of(redTeam, 100), - KV.of(blueTeam, 150), - KV.of(blueTeam, 200) - ); - - p.run().waitUntilFinish(); - } - - /** - * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs - * correctly per window and per key. - */ - @Test - public void testScoreUpdatesPerWindow() { - - TestStream> createEvents = TestStream.create(KvCoder.of( - StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) - .advanceWatermarkTo(baseTime) - .addElements( - event(TestUser.RED_ONE, 50, Duration.standardMinutes(1)), - event(TestUser.RED_TWO, 50, Duration.standardMinutes(2)), - event(TestUser.RED_ONE, 50, Duration.standardMinutes(3)), - event(TestUser.RED_ONE, 60, Duration.standardMinutes(6)), - event(TestUser.RED_TWO, 60, Duration.standardMinutes(7)) - ) - .advanceWatermarkToInfinity(); - - Duration teamWindowDuration = Duration.standardMinutes(5); - - PCollection> teamScores = p - .apply(createEvents) - .apply(Window.>into(FixedWindows.of(teamWindowDuration))) - .apply(ParDo.of(new UpdateTeamScoreFn(100))); - - String redTeam = TestUser.RED_ONE.getTeam(); - String blueTeam = TestUser.BLUE_ONE.getTeam(); - - IntervalWindow window1 = new IntervalWindow(baseTime, teamWindowDuration); - IntervalWindow window2 = new IntervalWindow(window1.end(), teamWindowDuration); - - PAssert.that(teamScores) - .inWindow(window1) - .containsInAnyOrder( - KV.of(redTeam, 100) - ); - - PAssert.that(teamScores) - .inWindow(window2) - .containsInAnyOrder( - KV.of(redTeam, 120) - ); - - p.run().waitUntilFinish(); - } - - private TimestampedValue> event( - TestUser user, - int score, - Duration baseTimeOffset) { - return TimestampedValue.of(KV.of(user.getTeam(), new GameActionInfo(user.getUser(), - user.getTeam(), - score, - baseTime.plus(baseTimeOffset).getMillis())), baseTime.plus(baseTimeOffset)); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java deleted file mode 100644 index b691a0cbd5..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.io.Serializable; -import java.util.Arrays; -import java.util.List; -import ${package}.complete.game.UserScore.ExtractAndSumScore; -import ${package}.complete.game.UserScore.GameActionInfo; -import ${package}.complete.game.UserScore.ParseEventFn; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.ValidatesRunner; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFnTester; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests of UserScore. - */ -@RunWith(JUnit4.class) -public class UserScoreTest implements Serializable { - - static final String[] GAME_EVENTS_ARRAY = new String[] { - "user0_MagentaKangaroo,MagentaKangaroo,3,1447955630000,2015-11-19 09:53:53.444", - "user13_ApricotQuokka,ApricotQuokka,15,1447955630000,2015-11-19 09:53:53.444", - "user6_AmberNumbat,AmberNumbat,11,1447955630000,2015-11-19 09:53:53.444", - "user7_AlmondWallaby,AlmondWallaby,15,1447955630000,2015-11-19 09:53:53.444", - "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,12,1447955630000,2015-11-19 09:53:53.444", - "user6_AliceBlueDingo,AliceBlueDingo,4,xxxxxxx,2015-11-19 09:53:53.444", - "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,11,1447955630000,2015-11-19 09:53:53.444", - "THIS IS A PARSE ERROR,2015-11-19 09:53:53.444", - "user19_BisqueBilby,BisqueBilby,6,1447955630000,2015-11-19 09:53:53.444", - "user19_BisqueBilby,BisqueBilby,8,1447955630000,2015-11-19 09:53:53.444" - }; - - static final String[] GAME_EVENTS_ARRAY2 = new String[] { - "user6_AliceBlueDingo,AliceBlueDingo,4,xxxxxxx,2015-11-19 09:53:53.444", - "THIS IS A PARSE ERROR,2015-11-19 09:53:53.444", - "user13_BisqueBilby,BisqueBilby,xxx,1447955630000,2015-11-19 09:53:53.444" - }; - - static final List GAME_EVENTS = Arrays.asList(GAME_EVENTS_ARRAY); - static final List GAME_EVENTS2 = Arrays.asList(GAME_EVENTS_ARRAY2); - - static final List> USER_SUMS = Arrays.asList( - KV.of("user0_MagentaKangaroo", 3), KV.of("user13_ApricotQuokka", 15), - KV.of("user6_AmberNumbat", 11), KV.of("user7_AlmondWallaby", 15), - KV.of("user7_AndroidGreenKookaburra", 23), - KV.of("user19_BisqueBilby", 14)); - - static final List> TEAM_SUMS = Arrays.asList( - KV.of("MagentaKangaroo", 3), KV.of("ApricotQuokka", 15), - KV.of("AmberNumbat", 11), KV.of("AlmondWallaby", 15), - KV.of("AndroidGreenKookaburra", 23), - KV.of("BisqueBilby", 14)); - - @Rule - public TestPipeline p = TestPipeline.create(); - - /** Test the {@link ParseEventFn} {@link org.apache.beam.sdk.transforms.DoFn}. */ - @Test - public void testParseEventFn() throws Exception { - DoFnTester parseEventFn = - DoFnTester.of(new ParseEventFn()); - - List results = parseEventFn.processBundle(GAME_EVENTS_ARRAY); - Assert.assertEquals(8, results.size()); - Assert.assertEquals("user0_MagentaKangaroo", results.get(0).getUser()); - Assert.assertEquals("MagentaKangaroo", results.get(0).getTeam()); - Assert.assertEquals(Integer.valueOf(3), results.get(0).getScore()); - } - - /** Tests ExtractAndSumScore("user"). */ - @Test - @Category(ValidatesRunner.class) - public void testUserScoreSums() throws Exception { - - PCollection input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of())); - - PCollection> output = input - .apply(ParDo.of(new ParseEventFn())) - // Extract and sum username/score pairs from the event data. - .apply("ExtractUserScore", new ExtractAndSumScore("user")); - - // Check the user score sums. - PAssert.that(output).containsInAnyOrder(USER_SUMS); - - p.run().waitUntilFinish(); - } - - /** Tests ExtractAndSumScore("team"). */ - @Test - @Category(ValidatesRunner.class) - public void testTeamScoreSums() throws Exception { - - PCollection input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of())); - - PCollection> output = input - .apply(ParDo.of(new ParseEventFn())) - // Extract and sum teamname/score pairs from the event data. - .apply("ExtractTeamScore", new ExtractAndSumScore("team")); - - // Check the team score sums. - PAssert.that(output).containsInAnyOrder(TEAM_SUMS); - - p.run().waitUntilFinish(); - } - - /** Test that bad input data is dropped appropriately. */ - @Test - @Category(ValidatesRunner.class) - public void testUserScoresBadInput() throws Exception { - - PCollection input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of())); - - PCollection> extract = input - .apply(ParDo.of(new ParseEventFn())) - .apply( - MapElements - .into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) - .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))); - - PAssert.that(extract).empty(); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties deleted file mode 100644 index b0195b3f16..0000000000 --- a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -package=it.pkg -version=0.1 -groupId=archetype.it -artifactId=basic -targetPlatform=1.8 diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt b/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt deleted file mode 100644 index 0b5987362f..0000000000 --- a/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt +++ /dev/null @@ -1 +0,0 @@ -verify diff --git a/maven-archetypes/pom.xml b/maven-archetypes/pom.xml deleted file mode 100644 index f995770ea6..0000000000 --- a/maven-archetypes/pom.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - 2.6.0-SNAPSHOT - ../pom.xml - - - google-cloud-dataflow-java-archetypes-parent - pom - - Google Cloud Dataflow SDK for Java - Maven Archetypes - - - starter - examples - - - - - - - src/main/resources - true - - archetype-resources/pom.xml - - - - - src/main/resources - false - - archetype-resources/pom.xml - - - - - - - - org.apache.maven.plugins - maven-resources-plugin - - - @ - - false - - - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - default-jar - none - - - default-test-jar - none - - - - - - diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml deleted file mode 100644 index 643cfa4096..0000000000 --- a/maven-archetypes/starter/pom.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-archetypes-parent - 2.6.0-SNAPSHOT - ../pom.xml - - - google-cloud-dataflow-java-archetypes-starter - Google Cloud Dataflow SDK for Java - Starter Archetype - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This archetype creates a simple starter pipeline to get started - using the Google Cloud Dataflow SDK for Java. - - maven-archetype - - - - - org.apache.maven.archetype - archetype-packaging - ${archetype-packaging.version} - - - - - - - src/test/resources - true - - - - - - - maven-archetype-plugin - ${maven-archetype-plugin.version} - - - org.apache.maven.shared - maven-invoker - ${maven-invoker.version} - - - - - - default-integration-test - install - - integration-test - - - - true - - - - - - - - - diff --git a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml deleted file mode 100644 index 428c74aa4a..0000000000 --- a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - 1.8 - - - - - - src/main/java - - **/*.java - - - - diff --git a/maven-archetypes/starter/src/main/resources/NOTICE b/maven-archetypes/starter/src/main/resources/NOTICE deleted file mode 100644 index 981fde5a9e..0000000000 --- a/maven-archetypes/starter/src/main/resources/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Google Cloud Dataflow SDK for Java -Copyright 2017, Google Inc. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml deleted file mode 100644 index da443b16fa..0000000000 --- a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - 4.0.0 - - ${groupId} - ${artifactId} - ${version} - - - UTF-8 - @maven-compiler-plugin.version@ - @exec-maven-plugin.version@ - @slf4j.version@ - - - - - ossrh.snapshots - Sonatype OSS Repository Hosting - https://oss.sonatype.org/content/repositories/snapshots/ - - false - - - true - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - ${maven-compiler-plugin.version} - - ${targetPlatform} - ${targetPlatform} - - - - - - - - org.codehaus.mojo - exec-maven-plugin - ${exec-maven-plugin.version} - - false - - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - @project.version@ - - - - - org.slf4j - slf4j-api - ${slf4j.version} - - - org.slf4j - slf4j-jdk14 - ${slf4j.version} - - - diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java deleted file mode 100644 index d6afdecf11..0000000000 --- a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A starter example for writing Beam programs. - * - *

The example takes two strings, converts them to their upper-case - * representation and logs them. - * - *

To run this starter example locally using DirectRunner, just - * execute it without any additional parameters from your favorite development - * environment. - * - *

To run this starter example using managed resource in Google Cloud - * Platform, you should specify the following command-line options: - * --project= - * --stagingLocation= - * --runner=DataflowRunner - */ -public class StarterPipeline { - private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class); - - public static void main(String[] args) { - Pipeline p = Pipeline.create( - PipelineOptionsFactory.fromArgs(args).withValidation().create()); - - p.apply(Create.of("Hello", "World")) - .apply(MapElements.via(new SimpleFunction() { - @Override - public String apply(String input) { - return input.toUpperCase(); - } - })) - .apply(ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - LOG.info(c.element()); - } - })); - - p.run(); - } -} diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties deleted file mode 100644 index b0195b3f16..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -package=it.pkg -version=0.1 -groupId=archetype.it -artifactId=basic -targetPlatform=1.8 diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt b/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt deleted file mode 100644 index 0b5987362f..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt +++ /dev/null @@ -1 +0,0 @@ -verify diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml deleted file mode 100644 index daf87595b7..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - 4.0.0 - - archetype.it - basic - 0.1 - - - UTF-8 - @maven-compiler-plugin.version@ - @exec-maven-plugin.version@ - @slf4j.version@ - - - - - ossrh.snapshots - Sonatype OSS Repository Hosting - https://oss.sonatype.org/content/repositories/snapshots/ - - false - - - true - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - ${maven-compiler-plugin.version} - - 1.8 - 1.8 - - - - - - - - org.codehaus.mojo - exec-maven-plugin - ${exec-maven-plugin.version} - - false - - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - @project.version@ - - - - - org.slf4j - slf4j-api - ${slf4j.version} - - - org.slf4j - slf4j-jdk14 - ${slf4j.version} - - - diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java deleted file mode 100644 index 4ae92e8ce6..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package it.pkg; - -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A starter example for writing Beam programs. - * - *

The example takes two strings, converts them to their upper-case - * representation and logs them. - * - *

To run this starter example locally using DirectRunner, just - * execute it without any additional parameters from your favorite development - * environment. - * - *

To run this starter example using managed resource in Google Cloud - * Platform, you should specify the following command-line options: - * --project= - * --stagingLocation= - * --runner=DataflowRunner - */ -public class StarterPipeline { - private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class); - - public static void main(String[] args) { - Pipeline p = Pipeline.create( - PipelineOptionsFactory.fromArgs(args).withValidation().create()); - - p.apply(Create.of("Hello", "World")) - .apply(MapElements.via(new SimpleFunction() { - @Override - public String apply(String input) { - return input.toUpperCase(); - } - })) - .apply(ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - LOG.info(c.element()); - } - })); - - p.run(); - } -} diff --git a/pom.xml b/pom.xml deleted file mode 100644 index 2924ff9fa2..0000000000 --- a/pom.xml +++ /dev/null @@ -1,449 +0,0 @@ - - - - 4.0.0 - - - com.google - google - 5 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - Google Cloud Dataflow SDK for Java - Parent - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This artifact includes the parent POM for other Dataflow SDK - artifacts. - http://cloud.google.com/dataflow - 2013 - - 2.6.0-SNAPSHOT - - - - Apache License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - - - - - - Google Inc. - http://www.google.com - - - - - scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - HEAD - - - - - ossrh - https://oss.sonatype.org/content/repositories/snapshots - - - ossrh - https://oss.sonatype.org/service/local/staging/deploy/maven2/ - - - - - - apache.staging - Apache Software Foundation Staging Repository - https://repository.apache.org/content/repositories/staging/ - - true - - - false - - - - - apache.snapshots - Apache Software Foundation Snapshot Repository - https://repository.apache.org/content/repositories/snapshots/ - - false - - - true - - - - - - 3.2 - - - - 1.8 - - UTF-8 - ${maven.build.timestamp} - yyyy-MM-dd HH:mm - - 2.5.0 - - Google Cloud Dataflow SDK for Java - beam-${beam.version} - 6 - 1 - - v2-rev374-1.23.0 - 8.7 - 1.0.0 - 1.23.0 - 20.0 - 1.3 - 2.4 - 4.12 - 1.0.0 - 1.9.5 - v1-rev382-1.23.0 - 1.7.25 - - 2.4 - 1.6.0 - 2.20.1 - 2.4 - 3.0.0 - 3.1.0 - 3.7.0 - 3.1.1 - 2.2 - 3.0.2 - 3.0.0-M1 - 2.5.3 - 3.1.0 - 3.1.0 - 2.21.0 - 3.0.1 - - - pom - - sdk - examples - maven-archetypes - - - - - - - org.apache.maven.plugins - maven-clean-plugin - ${maven-clean-plugin.version} - - - - org.apache.maven.plugins - maven-compiler-plugin - ${maven-compiler-plugin.version} - - ${java.version} - ${java.version} - - -Xlint:all - -Werror - -Xlint:-options - - -Xlint:-processing - - true - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - ${maven-checkstyle-plugin.version} - - - com.puppycrawl.tools - checkstyle - ${checkstyle.version} - - - org.apache.beam - beam-sdks-java-build-tools - ${beam.version} - - - - beam/checkstyle.xml - sdk/suppressions.xml - true - true - false - true - - - - - test-compile - - check - - - - - - - org.apache.maven.plugins - maven-jar-plugin - ${maven-jar-plugin.version} - - true - - - - default-jar - - jar - - - - default-test-jar - - test-jar - - - - - - - org.apache.maven.plugins - maven-source-plugin - ${maven-source-plugin.version} - - - attach-sources - compile - - jar - - - - attach-test-sources - test-compile - - test-jar - - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - ${maven-javadoc-plugin.version} - - false - - - - javadoc - package - - jar - - - - - - - org.apache.maven.plugins - maven-resources-plugin - ${maven-resources-plugin.version} - - - - org.apache.maven.plugins - maven-dependency-plugin - ${maven-dependency-plugin.version} - - - - analyze-only - - - true - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - ${maven-surefire-plugin.version} - - - - org.apache.maven.plugins - maven-archetype-plugin - ${maven-archetype-plugin.version} - - - org.apache.maven.shared - maven-invoker - ${maven-invoker.version} - - - - - - default-integration-test - install - - integration-test - - - true - - - - - - - org.apache.maven.plugins - maven-release-plugin - ${maven-release-plugin} - - true - true - deploy - - - - - org.codehaus.mojo - exec-maven-plugin - ${exec-maven-plugin.version} - - false - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - - org.apache.maven.plugins - maven-source-plugin - - - - org.apache.maven.plugins - maven-javadoc-plugin - - - - org.apache.maven.plugins - maven-dependency-plugin - - - - org.apache.maven.plugins - maven-surefire-plugin - - - - org.apache.maven.plugins - maven-checkstyle-plugin - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - ${project.version} - - - - org.apache.beam - beam-sdks-java-core - ${beam.version} - - - - org.apache.beam - beam-sdks-java-io-google-cloud-platform - ${beam.version} - - - - org.apache.beam - beam-runners-direct-java - ${beam.version} - - - - org.apache.beam - beam-runners-google-cloud-dataflow-java - ${beam.version} - - - - org.apache.beam - beam-examples-java - ${beam.version} - - - - org.apache.beam - beam-sdks-java-io-kafka - ${beam.version} - - - - junit - junit - ${junit.version} - test - - - - diff --git a/sdk/pom.xml b/sdk/pom.xml deleted file mode 100644 index 0bd69dc58c..0000000000 --- a/sdk/pom.xml +++ /dev/null @@ -1,75 +0,0 @@ - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - 2.6.0-SNAPSHOT - - - google-cloud-dataflow-java-sdk-all - Google Cloud Dataflow SDK for Java - All - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This artifact includes entire Dataflow Java SDK. - - jar - - - - - src/main/resources - true - - - - - - - org.apache.beam - beam-sdks-java-core - - - - org.apache.beam - beam-sdks-java-io-google-cloud-platform - - - - org.apache.beam - beam-runners-direct-java - - - - org.apache.beam - beam-runners-google-cloud-dataflow-java - - - - org.apache.beam - beam-sdks-java-io-kafka - - - - junit - junit - test - - - diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java deleted file mode 100644 index df3fd76ae6..0000000000 --- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 2017 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.dataflow.sdk; - -import org.apache.beam.runners.dataflow.DataflowRunner; -import org.apache.beam.runners.direct.DirectRunner; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; -import org.apache.beam.sdk.io.kafka.KafkaIO; - -/** - * Mark the dependencies as used at compile time. - */ -class SdkDependencies { - private Pipeline p; - private BigQueryIO bigQueryIO; - private KafkaIO kafkaIO; - private DirectRunner directRunner; - private DataflowRunner dataflowRunner; -} diff --git a/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow-distribution.properties b/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow-distribution.properties deleted file mode 100644 index 33ee76287a..0000000000 --- a/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow-distribution.properties +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -name=${dataflow.release_name} -version=${pom.version} -build.date=${timestamp} -legacy.environment.major.version=${dataflow.legacy_environment_major_version} -fnapi.environment.major.version=${dataflow.fnapi_environment_major_version} -container.version=${dataflow.container_version} diff --git a/sdk/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerInfoOverrideTest.java b/sdk/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerInfoOverrideTest.java deleted file mode 100644 index 5088a00cfc..0000000000 --- a/sdk/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerInfoOverrideTest.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (C) 2017 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.beam.runners.dataflow; - -import static org.junit.Assert.assertEquals; - -import java.io.InputStream; -import java.util.Properties; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests for {@link DataflowRunnerInfo} specifically validating that properties in - * this distrbution are correctly read. - */ -@RunWith(JUnit4.class) -public class DataflowRunnerInfoOverrideTest { - private static final String DATAFLOW_DISTRIBUTION_PROPERTIES_PATH = - "/org/apache/beam/runners/dataflow/dataflow-distribution.properties"; - - private static final String FNAPI_ENVIRONMENT_MAJOR_VERSION_KEY = - "fnapi.environment.major.version"; - private static final String LEGACY_ENVIRONMENT_MAJOR_VERSION_KEY = - "legacy.environment.major.version"; - private static final String CONTAINER_VERSION_KEY = "container.version"; - - - @Test - public void testDataflowDistributionOverride() throws Exception { - try (InputStream in = - DataflowRunnerInfo.class.getResourceAsStream(DATAFLOW_DISTRIBUTION_PROPERTIES_PATH)) { - Properties properties = new Properties(); - properties.load(in); - - assertEquals(properties.getProperty(FNAPI_ENVIRONMENT_MAJOR_VERSION_KEY), - DataflowRunnerInfo.getDataflowRunnerInfo().getFnApiEnvironmentMajorVersion()); - assertEquals(properties.getProperty(LEGACY_ENVIRONMENT_MAJOR_VERSION_KEY), - DataflowRunnerInfo.getDataflowRunnerInfo().getLegacyEnvironmentMajorVersion()); - assertEquals(properties.getProperty(CONTAINER_VERSION_KEY), - DataflowRunnerInfo.getDataflowRunnerInfo().getContainerVersion()); - } - } -} diff --git a/sdk/suppressions.xml b/sdk/suppressions.xml deleted file mode 100644 index 4d707ab291..0000000000 --- a/sdk/suppressions.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - - - From 9f055d9b6e15f512491e6e8310abb31d73fc4cb0 Mon Sep 17 00:00:00 2001 From: Ahmet Altay Date: Wed, 25 Jul 2018 10:42:36 -0700 Subject: [PATCH 25/25] Clean up README --- README.md | 84 +++++++++---------------------------------------------- 1 file changed, 13 insertions(+), 71 deletions(-) diff --git a/README.md b/README.md index 112df59d01..dfb630ad79 100644 --- a/README.md +++ b/README.md @@ -16,86 +16,28 @@ # Google Cloud Dataflow SDK for Java -[Google Cloud Dataflow](https://cloud.google.com/dataflow/) provides a simple, -powerful programming model for building both batch and streaming parallel data -processing pipelines. +[Google Cloud Dataflow](https://cloud.google.com/dataflow/) is a service for executing [Apache Beam](https://beam.apache.org) pipelines on Google Cloud Platform. -Dataflow SDK for Java is a distribution of a portion of the -[Apache Beam](https://beam.apache.org) project. This repository hosts the -code to build this distribution and any Dataflow-specific code/modules. The -underlying source code is hosted in the -[Apache Beam repository](https://github.com/apache/beam). - -[General usage](https://cloud.google.com/dataflow/getting-started) of Google -Cloud Dataflow does **not** require use of this repository. Instead, you can do -any one of the following: - -1. Depend directly on a specific -[version](https://cloud.google.com/dataflow/downloads) of the SDK in -the [Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22) -by adding the following dependency to development -environments like Eclipse or Apache Maven: - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - version_number - - -1. Download the example pipelines from the separate -[DataflowJavaSDK-examples](https://github.com/GoogleCloudPlatform/DataflowJavaSDK-examples) -repository. - -1. If you are using [Eclipse](https://eclipse.org/) integrated development -environment (IDE), the -[Cloud Dataflow Plugin for Eclipse](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-java-eclipse) -provides tools to create and execute Dataflow pipelines inside Eclipse. - -## Status [![Build Status](https://api.travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK.svg?branch=master)](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK) - -Both the SDK and the Dataflow Service are generally available and considered -stable and fully qualified for production use. - -This [`master`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/) branch -contains code to build Dataflow SDK 2.0.0 and newer, as a distribution of Apache -Beam. Pre-Beam SDKs, versions 1.x, are maintained in the -[`master-1.x`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master-1.x) -branch. - -## Overview - -The key concepts in this programming model are: - -* `PCollection`: represents a collection of data, which could be bounded or -unbounded in size. -* `PTransform`: represents a computation that transforms input PCollections -into output PCollections. -* `Pipeline`: manages a directed acyclic graph of PTransforms and PCollections -that is ready for execution. -* `PipelineRunner`: specifies where and how the pipeline should execute. - -We provide two runners: - - 1. The `DirectRunner` runs the pipeline on your local machine. - 1. The `DataflowRunner` submits the pipeline to the Cloud Dataflow Service, -where it runs using managed resources in the -[Google Cloud Platform](https://cloud.google.com). +## Getting Started -The SDK is built to be extensible and support additional execution environments -beyond local execution and the Google Cloud Dataflow Service. Apache Beam -contains additional SDKs, runners, and IO connectors. +* [Quickstart Using Java](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-java-maven) on Google Cloud Dataflow +* [Java API Reference](https://beam.apache.org/documentation/sdks/javadoc/) +* [Java Examples](https://github.com/apache/beam/tree/master/examples/java) -## Getting Started +## We moved to Apache Beam! +Apache Beam Java SDK and the code development moved to the [Apache Beam repo](https://github.com/apache/beam/tree/master/sdks/java). -Please try our [Quickstarts](https://cloud.google.com/dataflow/docs/quickstarts). +If you want to contribute to the project (please do!) use this [Apache Beam contributor's guide](http://beam.apache.org/contribution-guide/) ## Contact Us -We welcome all usage-related questions on [Stack Overflow](http://stackoverflow.com/questions/tagged/google-cloud-dataflow) +We welcome all usage-related questions on +[Stack Overflow](https://stackoverflow.com/questions/tagged/google-cloud-dataflow) tagged with `google-cloud-dataflow`. -Please use [issue tracker](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/issues) -on GitHub to report any bugs, comments or questions regarding SDK development. +Please use the +[issue tracker](https://issues.apache.org/jira/browse/BEAM) +on Apache JIRA to report any bugs, comments or questions regarding SDK development. ## More Information