diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index c39158cf00..0000000000 --- a/.gitattributes +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -# The default behavior, which overrides 'core.autocrlf', is to use Git's -# built-in heuristics to determine whether a particular file is text or binary. -# Text files are automatically normalized to the user's platforms. -* text=auto - -# Explicitly declare text files that should always be normalized and converted -# to native line endings. -.gitattributes text -.gitignore text -LICENSE text -*.avsc text -*.html text -*.java text -*.md text -*.properties text -*.proto text -*.py text -*.sh text -*.xml text -*.yml text - -# Declare files that will always have CRLF line endings on checkout. -# *.sln text eol=crlf - -# Explicitly denote all files that are truly binary and should not be modified. -# *.jpg binary diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 2a27023c28..0000000000 --- a/.gitignore +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -target/ - -# Ignore IntelliJ files. -.idea/ -*.iml -*.ipr -*.iws - -# Ignore Eclipse files. -.classpath -.project -.settings/ - -# The build process generates the dependency-reduced POM, but it shouldn't be -# committed. -dependency-reduced-pom.xml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 8fa5d9a932..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -language: java - -sudo: false - -notifications: - email: - # Group email notifications are disabled for now, since we cannot do it on a per-branch basis. - # Right now, it would trigger a notification for each fork, which generates a lot of spam. - # recipients: - # - dataflow-sdk-build-notifications+travis@google.com - on_success: change - on_failure: always - -matrix: - include: - # On OSX, run with default JDK only. - - os: osx - # On Linux, run with specific JDKs only. - - os: linux - env: CUSTOM_JDK="oraclejdk8" - # The distribution does not build with Java 7 by design. We need to rewrite these tests - # to, for example, build and install with Java 8 and then test examples with Java 7. - # - os: linux - # env: CUSTOM_JDK="oraclejdk7" - # - os: linux - # env: CUSTOM_JDK="openjdk7" - -before_install: - - if [ "$TRAVIS_OS_NAME" == "osx" ]; then export JAVA_HOME=$(/usr/libexec/java_home); fi - - if [ "$TRAVIS_OS_NAME" == "linux" ]; then jdk_switcher use "$CUSTOM_JDK"; fi - -install: - - travis_retry mvn install clean -U -DskipTests=true - -script: - # Verify that the project can be built and installed. - - mvn install - # Verify that starter and examples archetypes have the correct version of the NOTICE file. - - diff -q NOTICE maven-archetypes/starter/src/main/resources/NOTICE - - diff -q NOTICE maven-archetypes/examples/src/main/resources/NOTICE diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 9b616e5fe3..0000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,51 +0,0 @@ - - -Want to contribute? Great! First, read this page (including the small print at -the end). - -Google Cloud Dataflow SDK is a distribution of Apache Beam. If you'd like to -change anything under the `org.apache.beam.*` namespace, please submit that -change directly to the [Apache Beam](https://github.com/apache/beam) project. - -This repository contains code to build the Dataflow distribution of Beam, and -some Dataflow-specific code. Only changes to how the distribution is built, or -the Dataflow-specific code under the `com.google.cloud.dataflow.*` namespace, -can be merged here. - -### Before you contribute -Before we can use your code, you must sign the -[Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual?csw=1) -(CLA), which you can do online. The CLA is necessary mainly because you own the -copyright to your changes, even after your contribution becomes part of our -codebase, so we need your permission to use and distribute your code. We also -need to be sure of various other things. For instance that you'll tell us if you -know that your code infringes on other people's patents. You don't have to sign -the CLA until after you've submitted your code for review and a member has -approved it, but you must do it before we can put your code into our codebase. - -Before you start working on a larger contribution, we recommend to get in touch -with us first through the issue tracker with your idea so that we can help out -and possibly guide you. Coordinating up front makes it much easier to avoid -frustration later on. - -### Code reviews -All submissions, including submissions by project members, require review. We -use GitHub pull requests for this purpose. - -### The small print -Contributions made by corporations are covered by a different agreement than -the one above, the Software Grant and Corporate Contributor License Agreement. diff --git a/LICENSE b/LICENSE deleted file mode 100644 index d645695673..0000000000 --- a/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/NOTICE b/NOTICE deleted file mode 100644 index 981fde5a9e..0000000000 --- a/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Google Cloud Dataflow SDK for Java -Copyright 2017, Google Inc. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/README.md b/README.md index 112df59d01..dfb630ad79 100644 --- a/README.md +++ b/README.md @@ -16,86 +16,28 @@ # Google Cloud Dataflow SDK for Java -[Google Cloud Dataflow](https://cloud.google.com/dataflow/) provides a simple, -powerful programming model for building both batch and streaming parallel data -processing pipelines. +[Google Cloud Dataflow](https://cloud.google.com/dataflow/) is a service for executing [Apache Beam](https://beam.apache.org) pipelines on Google Cloud Platform. -Dataflow SDK for Java is a distribution of a portion of the -[Apache Beam](https://beam.apache.org) project. This repository hosts the -code to build this distribution and any Dataflow-specific code/modules. The -underlying source code is hosted in the -[Apache Beam repository](https://github.com/apache/beam). - -[General usage](https://cloud.google.com/dataflow/getting-started) of Google -Cloud Dataflow does **not** require use of this repository. Instead, you can do -any one of the following: - -1. Depend directly on a specific -[version](https://cloud.google.com/dataflow/downloads) of the SDK in -the [Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22) -by adding the following dependency to development -environments like Eclipse or Apache Maven: - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - version_number - - -1. Download the example pipelines from the separate -[DataflowJavaSDK-examples](https://github.com/GoogleCloudPlatform/DataflowJavaSDK-examples) -repository. - -1. If you are using [Eclipse](https://eclipse.org/) integrated development -environment (IDE), the -[Cloud Dataflow Plugin for Eclipse](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-java-eclipse) -provides tools to create and execute Dataflow pipelines inside Eclipse. - -## Status [![Build Status](https://api.travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK.svg?branch=master)](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK) - -Both the SDK and the Dataflow Service are generally available and considered -stable and fully qualified for production use. - -This [`master`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/) branch -contains code to build Dataflow SDK 2.0.0 and newer, as a distribution of Apache -Beam. Pre-Beam SDKs, versions 1.x, are maintained in the -[`master-1.x`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master-1.x) -branch. - -## Overview - -The key concepts in this programming model are: - -* `PCollection`: represents a collection of data, which could be bounded or -unbounded in size. -* `PTransform`: represents a computation that transforms input PCollections -into output PCollections. -* `Pipeline`: manages a directed acyclic graph of PTransforms and PCollections -that is ready for execution. -* `PipelineRunner`: specifies where and how the pipeline should execute. - -We provide two runners: - - 1. The `DirectRunner` runs the pipeline on your local machine. - 1. The `DataflowRunner` submits the pipeline to the Cloud Dataflow Service, -where it runs using managed resources in the -[Google Cloud Platform](https://cloud.google.com). +## Getting Started -The SDK is built to be extensible and support additional execution environments -beyond local execution and the Google Cloud Dataflow Service. Apache Beam -contains additional SDKs, runners, and IO connectors. +* [Quickstart Using Java](https://cloud.google.com/dataflow/docs/quickstarts/quickstart-java-maven) on Google Cloud Dataflow +* [Java API Reference](https://beam.apache.org/documentation/sdks/javadoc/) +* [Java Examples](https://github.com/apache/beam/tree/master/examples/java) -## Getting Started +## We moved to Apache Beam! +Apache Beam Java SDK and the code development moved to the [Apache Beam repo](https://github.com/apache/beam/tree/master/sdks/java). -Please try our [Quickstarts](https://cloud.google.com/dataflow/docs/quickstarts). +If you want to contribute to the project (please do!) use this [Apache Beam contributor's guide](http://beam.apache.org/contribution-guide/) ## Contact Us -We welcome all usage-related questions on [Stack Overflow](http://stackoverflow.com/questions/tagged/google-cloud-dataflow) +We welcome all usage-related questions on +[Stack Overflow](https://stackoverflow.com/questions/tagged/google-cloud-dataflow) tagged with `google-cloud-dataflow`. -Please use [issue tracker](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/issues) -on GitHub to report any bugs, comments or questions regarding SDK development. +Please use the +[issue tracker](https://issues.apache.org/jira/browse/BEAM) +on Apache JIRA to report any bugs, comments or questions regarding SDK development. ## More Information diff --git a/examples/pom.xml b/examples/pom.xml deleted file mode 100644 index f87ae36b1d..0000000000 --- a/examples/pom.xml +++ /dev/null @@ -1,51 +0,0 @@ - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - 2.2.0-SNAPSHOT - - - google-cloud-dataflow-java-examples-all - Google Cloud Dataflow Java Examples - All - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This artifact includes all Dataflow Java SDK - examples. - - jar - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - - - - org.apache.beam - beam-examples-java - - - - org.apache.beam - beam-examples-java8 - - - diff --git a/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java b/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java deleted file mode 100644 index 827aff8395..0000000000 --- a/examples/src/main/java/com/google/cloud/dataflow/sdk/ExamplesDependencies.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2017 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.dataflow.sdk; - -import org.apache.beam.examples.MinimalWordCountJava8; -import org.apache.beam.examples.WordCount; - -/** - * Mark the examples dependencies as used at compile time. This is also needed - * to produce some content in the final JAR file. - */ -class ExamplesDependencies { - SdkDependencies sdkDependencies; - WordCount wordCount; - MinimalWordCountJava8 minimalWordCount; -} diff --git a/maven-archetypes/examples-java8/pom.xml b/maven-archetypes/examples-java8/pom.xml deleted file mode 100644 index 463c66f1d1..0000000000 --- a/maven-archetypes/examples-java8/pom.xml +++ /dev/null @@ -1,80 +0,0 @@ - - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-archetypes-parent - 2.2.0-SNAPSHOT - ../pom.xml - - - google-cloud-dataflow-java-archetypes-examples-java8 - Google Cloud Dataflow SDK for Java - Java 8 Examples Archetype - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This archetype creates a project containing all the example - pipelines targeting Java 8. - - maven-archetype - - - - - org.apache.maven.archetype - archetype-packaging - 2.4 - - - - - - - maven-archetype-plugin - 2.4 - - - org.apache.maven.shared - maven-invoker - 2.2 - - - - - - default-integration-test - install - - integration-test - - - - - - - - - - diff --git a/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml deleted file mode 100644 index 326fdaa528..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - - src/main/java - - **/*.java - - - - - src/test/java - - **/*.java - - - - diff --git a/maven-archetypes/examples-java8/src/main/resources/NOTICE b/maven-archetypes/examples-java8/src/main/resources/NOTICE deleted file mode 100644 index 981fde5a9e..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Google Cloud Dataflow SDK for Java -Copyright 2017, Google Inc. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml deleted file mode 100644 index f33914d476..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml +++ /dev/null @@ -1,248 +0,0 @@ - - - - 4.0.0 - - ${groupId} - ${artifactId} - ${version} - - jar - - - UTF-8 - 2.20 - - - - - ossrh.snapshots - Sonatype OSS Repository Hosting - https://oss.sonatype.org/content/repositories/snapshots/ - - false - - - true - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.6.1 - - 1.8 - 1.8 - - - - - org.apache.maven.plugins - maven-surefire-plugin - ${surefire-plugin.version} - - all - 4 - true - - - - org.apache.maven.surefire - surefire-junit47 - ${surefire-plugin.version} - - - - - - - org.apache.maven.plugins - maven-jar-plugin - 3.0.2 - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.0.0 - - - package - - shade - - - ${project.artifactId}-bundled-${project.version} - - - *:* - - META-INF/LICENSE - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - - - - - - - org.codehaus.mojo - exec-maven-plugin - 1.5.0 - - false - - - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - @project.version@ - - - - - com.google.api-client - google-api-client - 1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - com.google.apis - google-api-services-bigquery - v2-rev295-1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - com.google.http-client - google-http-client - 1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - com.google.apis - google-api-services-pubsub - v1-rev10-1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - joda-time - joda-time - 2.4 - - - - com.google.guava - guava - 20.0 - - - - - org.slf4j - slf4j-api - 1.7.14 - - - - org.slf4j - slf4j-jdk14 - 1.7.14 - - runtime - - - - - org.hamcrest - hamcrest-all - 1.3 - - - - junit - junit - 4.12 - - - - org.mockito - mockito-all - 1.9.5 - test - - - diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java deleted file mode 100644 index 07870f2ed0..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import java.util.List; -import java.util.regex.Pattern; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * An example that verifies word counts in Shakespeare and includes Beam best practices. - * - *

This class, {@link DebuggingWordCount}, is the third in a series of four successively more - * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount} - * and {@link WordCount}. After you've looked at this example, then see the - * {@link WindowedWordCount} pipeline, for introduction of additional concepts. - * - *

Basic concepts, also in the MinimalWordCount and WordCount examples: - * Reading text files; counting a PCollection; executing a Pipeline both locally - * and using a selected runner; defining DoFns. - * - *

New Concepts: - *

- *   1. Logging using SLF4J, even in a distributed environment
- *   2. Creating a custom metric (runners have varying levels of support)
- *   3. Testing your Pipeline via PAssert
- * 
- * - *

To execute this pipeline locally, specify general pipeline configuration: - *

{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * 
- * - *

To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - * - */ -public class DebuggingWordCount { - /** A DoFn that filters for a specific key based upon a regular expression. */ - public static class FilterTextFn extends DoFn, KV> { - /** - * Concept #1: The logger below uses the fully qualified class name of FilterTextFn as the - * logger. Depending on your SLF4J configuration, log statements will likely be qualified by - * this name. - * - *

Note that this is entirely standard SLF4J usage. Some runners may provide a default SLF4J - * configuration that is most appropriate for their logging integration. - */ - private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class); - - private final Pattern filter; - public FilterTextFn(String pattern) { - filter = Pattern.compile(pattern); - } - - /** - * Concept #2: A custom metric can track values in your pipeline as it runs. Each - * runner provides varying levels of support for metrics, and may expose them - * in a dashboard, etc. - */ - private final Counter matchedWords = Metrics.counter(FilterTextFn.class, "matchedWords"); - private final Counter unmatchedWords = Metrics.counter(FilterTextFn.class, "unmatchedWords"); - - @ProcessElement - public void processElement(ProcessContext c) { - if (filter.matcher(c.element().getKey()).matches()) { - // Log at the "DEBUG" level each element that we match. When executing this pipeline - // these log lines will appear only if the log level is set to "DEBUG" or lower. - LOG.debug("Matched: " + c.element().getKey()); - matchedWords.inc(); - c.output(c.element()); - } else { - // Log at the "TRACE" level each element that is not matched. Different log levels - // can be used to control the verbosity of logging providing an effective mechanism - // to filter less important information. - LOG.trace("Did not match: " + c.element().getKey()); - unmatchedWords.inc(); - } - } - } - - /** - * Options supported by {@link DebuggingWordCount}. - * - *

Inherits standard configuration options and all options defined in - * {@link WordCount.WordCountOptions}. - */ - public interface WordCountOptions extends WordCount.WordCountOptions { - - @Description("Regex filter pattern to use in DebuggingWordCount. " - + "Only words matching this pattern will be counted.") - @Default.String("Flourish|stomach") - String getFilterPattern(); - void setFilterPattern(String value); - } - - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); - Pipeline p = Pipeline.create(options); - - PCollection> filteredWords = - p.apply("ReadLines", TextIO.read().from(options.getInputFile())) - .apply(new WordCount.CountWords()) - .apply(ParDo.of(new FilterTextFn(options.getFilterPattern()))); - - /** - * Concept #3: PAssert is a set of convenient PTransforms in the style of - * Hamcrest's collection matchers that can be used when writing Pipeline level tests - * to validate the contents of PCollections. PAssert is best used in unit tests - * with small data sets but is demonstrated here as a teaching tool. - * - *

Below we verify that the set of filtered words matches our expected counts. Note - * that PAssert does not provide any output and that successful completion of the - * Pipeline implies that the expectations were met. Learn more at - * https://beam.apache.org/documentation/pipelines/test-your-pipeline/ on how to test - * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test. - */ - List> expectedResults = Arrays.asList( - KV.of("Flourish", 3L), - KV.of("stomach", 1L)); - PAssert.that(filteredWords).containsInAnyOrder(expectedResults); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java deleted file mode 100644 index d6b08066db..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import ${package}.common.ExampleUtils; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.apache.beam.sdk.values.KV; - - -/** - * An example that counts words in Shakespeare. - * - *

This class, {@link MinimalWordCount}, is the first in a series of four successively more - * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or - * argument processing, and focus on construction of the pipeline, which chains together the - * application of core transforms. - * - *

Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally the - * {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional - * concepts. - * - *

Concepts: - * - *

- *   1. Reading data from text files
- *   2. Specifying 'inline' transforms
- *   3. Counting items in a PCollection
- *   4. Writing data to text files
- * 
- * - *

No arguments are required to run this pipeline. It will be executed with the DirectRunner. You - * can see the results in the output files in your current working directory, with names like - * "wordcounts-00001-of-00005. When running on a distributed service, you would use an appropriate - * file service. - */ -public class MinimalWordCount { - - public static void main(String[] args) { - // Create a PipelineOptions object. This object lets us set various execution - // options for our pipeline, such as the runner you wish to use. This example - // will run with the DirectRunner by default, based on the class path configured - // in its dependencies. - PipelineOptions options = PipelineOptionsFactory.create(); - - // Create the Pipeline object with the options we defined above. - Pipeline p = Pipeline.create(options); - - // Apply the pipeline's transforms. - - // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set - // of input text files. TextIO.Read returns a PCollection where each element is one line from - // the input text (a set of Shakespeare's texts). - - // This example reads a public data set consisting of the complete works of Shakespeare. - p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) - - // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a - // DoFn (defined in-line) on each element that tokenizes the text line into individual words. - // The ParDo returns a PCollection, where each element is an individual word in - // Shakespeare's collected texts. - .apply("ExtractWords", ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - for (String word : c.element().split(ExampleUtils.TOKENIZER_PATTERN)) { - if (!word.isEmpty()) { - c.output(word); - } - } - } - })) - - // Concept #3: Apply the Count transform to our PCollection of individual words. The Count - // transform returns a new PCollection of key/value pairs, where each key represents a unique - // word in the text. The associated value is the occurrence count for that word. - .apply(Count.perElement()) - - // Apply a MapElements transform that formats our PCollection of word counts into a printable - // string, suitable for writing to an output file. - .apply("FormatResults", MapElements.via(new SimpleFunction, String>() { - @Override - public String apply(KV input) { - return input.getKey() + ": " + input.getValue(); - } - })) - - // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline. - // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of - // formatted strings) to a series of text files. - // - // By default, it will write to a set of files with names like wordcount-00001-of-00005 - .apply(TextIO.write().to("wordcounts")); - - // Run the pipeline. - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCountJava8.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCountJava8.java deleted file mode 100644 index e635a885b7..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/MinimalWordCountJava8.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.FlatMapElements; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.TypeDescriptors; - -/** - * An example that counts words in Shakespeare, using Java 8 language features. - * - *

See {@link MinimalWordCount} for a comprehensive explanation. - */ -public class MinimalWordCountJava8 { - - public static void main(String[] args) { - PipelineOptions options = PipelineOptionsFactory.create(); - // In order to run your pipeline, you need to make following runner specific changes: - // - // CHANGE 1/3: Select a Beam runner, such as BlockingDataflowRunner - // or FlinkRunner. - // CHANGE 2/3: Specify runner-required options. - // For BlockingDataflowRunner, set project and temp location as follows: - // DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class); - // dataflowOptions.setRunner(BlockingDataflowRunner.class); - // dataflowOptions.setProject("SET_YOUR_PROJECT_ID_HERE"); - // dataflowOptions.setTempLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_TEMP_DIRECTORY"); - // For FlinkRunner, set the runner as follows. See {@code FlinkPipelineOptions} - // for more details. - // options.as(FlinkPipelineOptions.class) - // .setRunner(FlinkRunner.class); - - Pipeline p = Pipeline.create(options); - - p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) - .apply(FlatMapElements - .into(TypeDescriptors.strings()) - .via((String word) -> Arrays.asList(word.split("[^\\p{L}]+")))) - .apply(Filter.by((String word) -> !word.isEmpty())) - .apply(Count.perElement()) - .apply(MapElements - .into(TypeDescriptors.strings()) - .via((KV wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())) - // CHANGE 3/3: The Google Cloud Storage path is required for outputting the results to. - .apply(TextIO.write().to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX")); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java deleted file mode 100644 index 6a1d07c485..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.io.IOException; -import java.util.concurrent.ThreadLocalRandom; -import ${package}.common.ExampleBigQueryTableOptions; -import ${package}.common.ExampleOptions; -import ${package}.common.WriteOneFilePerWindow; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.joda.time.Duration; -import org.joda.time.Instant; - - -/** - * An example that counts words in text, and can run over either unbounded or bounded input - * collections. - * - *

This class, {@link WindowedWordCount}, is the last in a series of four successively more - * detailed 'word count' examples. First take a look at {@link MinimalWordCount}, - * {@link WordCount}, and {@link DebuggingWordCount}. - * - *

Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples: - * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally - * and using a selected runner; defining DoFns; - * user-defined PTransforms; defining PipelineOptions. - * - *

New Concepts: - *

- *   1. Unbounded and bounded pipeline input modes
- *   2. Adding timestamps to data
- *   3. Windowing
- *   4. Re-using PTransforms over windowed PCollections
- *   5. Accessing the window of an element
- *   6. Writing data to per-window text files
- * 
- * - *

By default, the examples will run with the {@code DirectRunner}. - * To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * See examples/java/README.md for instructions about how to configure different runners. - * - *

To execute this pipeline locally, specify a local output file (if using the - * {@code DirectRunner}) or output prefix on a supported distributed file system. - *

{@code
- *   --output=[YOUR_LOCAL_FILE | YOUR_OUTPUT_PREFIX]
- * }
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - * - *

By default, the pipeline will do fixed windowing, on 1-minute windows. You can - * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10} - * for 10-minute windows. - * - *

The example will try to cancel the pipeline on the signal to terminate the process (CTRL-C). - */ -public class WindowedWordCount { - static final int WINDOW_SIZE = 10; // Default window duration in minutes - /** - * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for - * this example, for the bounded data case. - * - *

Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate - * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a - * 2-hour period. - */ - static class AddTimestampFn extends DoFn { - private static final Duration RAND_RANGE = Duration.standardHours(1); - private final Instant minTimestamp; - private final Instant maxTimestamp; - - AddTimestampFn(Instant minTimestamp, Instant maxTimestamp) { - this.minTimestamp = minTimestamp; - this.maxTimestamp = maxTimestamp; - } - - @ProcessElement - public void processElement(ProcessContext c) { - Instant randomTimestamp = - new Instant( - ThreadLocalRandom.current() - .nextLong(minTimestamp.getMillis(), maxTimestamp.getMillis())); - - /** - * Concept #2: Set the data element with that timestamp. - */ - c.outputWithTimestamp(c.element(), new Instant(randomTimestamp)); - } - } - - /** A {@link DefaultValueFactory} that returns the current system time. */ - public static class DefaultToCurrentSystemTime implements DefaultValueFactory { - @Override - public Long create(PipelineOptions options) { - return System.currentTimeMillis(); - } - } - - /** A {@link DefaultValueFactory} that returns the minimum timestamp plus one hour. */ - public static class DefaultToMinTimestampPlusOneHour implements DefaultValueFactory { - @Override - public Long create(PipelineOptions options) { - return options.as(Options.class).getMinTimestampMillis() - + Duration.standardHours(1).getMillis(); - } - } - - /** - * Options for {@link WindowedWordCount}. - * - *

Inherits standard example configuration options, which allow specification of the - * runner, as well as the {@link WordCount.WordCountOptions} support for - * specification of the input and output files. - */ - public interface Options extends WordCount.WordCountOptions, - ExampleOptions, ExampleBigQueryTableOptions { - @Description("Fixed window duration, in minutes") - @Default.Integer(WINDOW_SIZE) - Integer getWindowSize(); - void setWindowSize(Integer value); - - @Description("Minimum randomly assigned timestamp, in milliseconds-since-epoch") - @Default.InstanceFactory(DefaultToCurrentSystemTime.class) - Long getMinTimestampMillis(); - void setMinTimestampMillis(Long value); - - @Description("Maximum randomly assigned timestamp, in milliseconds-since-epoch") - @Default.InstanceFactory(DefaultToMinTimestampPlusOneHour.class) - Long getMaxTimestampMillis(); - void setMaxTimestampMillis(Long value); - - @Description("Fixed number of shards to produce per window, or null for runner-chosen sharding") - Integer getNumShards(); - void setNumShards(Integer numShards); - } - - public static void main(String[] args) throws IOException { - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - final String output = options.getOutput(); - final Instant minTimestamp = new Instant(options.getMinTimestampMillis()); - final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis()); - - Pipeline pipeline = Pipeline.create(options); - - /** - * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or - * unbounded input source. - */ - PCollection input = pipeline - /** Read from the GCS file. */ - .apply(TextIO.read().from(options.getInputFile())) - // Concept #2: Add an element timestamp, using an artificial time just to show windowing. - // See AddTimestampFn for more detail on this. - .apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp))); - - /** - * Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1 - * minute (you can change this with a command-line option). See the documentation for more - * information on how fixed windows work, and for information on the other types of windowing - * available (e.g., sliding windows). - */ - PCollection windowedWords = - input.apply( - Window.into( - FixedWindows.of(Duration.standardMinutes(options.getWindowSize())))); - - /** - * Concept #4: Re-use our existing CountWords transform that does not have knowledge of - * windows over a PCollection containing windowed values. - */ - PCollection> wordCounts = windowedWords.apply(new WordCount.CountWords()); - - /** - * Concept #5: Format the results and write to a sharded file partitioned by window, using a - * simple ParDo operation. Because there may be failures followed by retries, the - * writes must be idempotent, but the details of writing to files is elided here. - */ - wordCounts - .apply(MapElements.via(new WordCount.FormatAsTextFn())) - .apply(new WriteOneFilePerWindow(output, options.getNumShards())); - - PipelineResult result = pipeline.run(); - try { - result.waitUntilFinish(); - } catch (Exception exc) { - result.cancel(); - } - } - -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WordCount.java deleted file mode 100644 index 79b71403b9..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/WordCount.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import ${package}.common.ExampleUtils; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.options.Validation.Required; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; - -/** - * An example that counts words in Shakespeare and includes Beam best practices. - * - *

This class, {@link WordCount}, is the second in a series of four successively more detailed - * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}. - * After you've looked at this example, then see the {@link DebuggingWordCount} - * pipeline, for introduction of additional concepts. - * - *

For a detailed walkthrough of this example, see - * - * https://beam.apache.org/get-started/wordcount-example/ - * - * - *

Basic concepts, also in the MinimalWordCount example: - * Reading text files; counting a PCollection; writing to text files - * - *

New Concepts: - *

- *   1. Executing a Pipeline both locally and using the selected runner
- *   2. Using ParDo with static DoFns defined out-of-line
- *   3. Building a composite transform
- *   4. Defining your own pipeline options
- * 
- * - *

Concept #1: you can execute this pipeline either locally or using by selecting another runner. - * These are now command-line options and not hard-coded as they were in the MinimalWordCount - * example. - * - *

To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * - *

To execute this pipeline, specify a local output file (if using the - * {@code DirectRunner}) or output prefix on a supported distributed file system. - *

{@code
- *   --output=[YOUR_LOCAL_FILE | YOUR_OUTPUT_PREFIX]
- * }
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - */ -public class WordCount { - - /** - * Concept #2: You can make your pipeline assembly code less verbose by defining your DoFns - * statically out-of-line. This DoFn tokenizes lines of text into individual words; we pass it - * to a ParDo in the pipeline. - */ - static class ExtractWordsFn extends DoFn { - private final Counter emptyLines = Metrics.counter(ExtractWordsFn.class, "emptyLines"); - - @ProcessElement - public void processElement(ProcessContext c) { - if (c.element().trim().isEmpty()) { - emptyLines.inc(); - } - - // Split the line into words. - String[] words = c.element().split(ExampleUtils.TOKENIZER_PATTERN); - - // Output each word encountered into the output PCollection. - for (String word : words) { - if (!word.isEmpty()) { - c.output(word); - } - } - } - } - - /** A SimpleFunction that converts a Word and Count into a printable string. */ - public static class FormatAsTextFn extends SimpleFunction, String> { - @Override - public String apply(KV input) { - return input.getKey() + ": " + input.getValue(); - } - } - - /** - * A PTransform that converts a PCollection containing lines of text into a PCollection of - * formatted word counts. - * - *

Concept #3: This is a custom composite transform that bundles two transforms (ParDo and - * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse, - * modular testing, and an improved monitoring experience. - */ - public static class CountWords extends PTransform, - PCollection>> { - @Override - public PCollection> expand(PCollection lines) { - - // Convert lines of text into individual words. - PCollection words = lines.apply( - ParDo.of(new ExtractWordsFn())); - - // Count the number of times each word occurs. - PCollection> wordCounts = - words.apply(Count.perElement()); - - return wordCounts; - } - } - - /** - * Options supported by {@link WordCount}. - * - *

Concept #4: Defining your own configuration options. Here, you can add your own arguments - * to be processed by the command-line parser, and specify default values for them. You can then - * access the options values in your pipeline code. - * - *

Inherits standard configuration options. - */ - public interface WordCountOptions extends PipelineOptions { - - /** - * By default, this example reads from a public dataset containing the text of - * King Lear. Set this option to choose a different input file or glob. - */ - @Description("Path of the file to read from") - @Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt") - String getInputFile(); - void setInputFile(String value); - - /** - * Set this required option to specify where to write the output. - */ - @Description("Path of the file to write to") - @Required - String getOutput(); - void setOutput(String value); - } - - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); - Pipeline p = Pipeline.create(options); - - // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the - // static FormatAsTextFn() to the ParDo transform. - p.apply("ReadLines", TextIO.read().from(options.getInputFile())) - .apply(new CountWords()) - .apply(MapElements.via(new FormatAsTextFn())) - .apply("WriteCounts", TextIO.write().to(options.getOutput())); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java deleted file mode 100644 index 57f1546e27..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import com.google.api.services.bigquery.model.TableSchema; -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure BigQuery tables in Beam examples. - * The project defaults to the project being used to run the example. - */ -public interface ExampleBigQueryTableOptions extends GcpOptions { - @Description("BigQuery dataset name") - @Default.String("beam_examples") - String getBigQueryDataset(); - void setBigQueryDataset(String dataset); - - @Description("BigQuery table name") - @Default.InstanceFactory(BigQueryTableFactory.class) - String getBigQueryTable(); - void setBigQueryTable(String table); - - @Description("BigQuery table schema") - TableSchema getBigQuerySchema(); - void setBigQuerySchema(TableSchema schema); - - /** - * Returns the job name as the default BigQuery table name. - */ - class BigQueryTableFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return options.getJobName().replace('-', '_'); - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java deleted file mode 100644 index 90f935c3ce..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure the Beam examples. - */ -public interface ExampleOptions extends PipelineOptions { - @Description("Whether to keep jobs running after local process exit") - @Default.Boolean(false) - boolean getKeepJobsRunning(); - void setKeepJobsRunning(boolean keepJobsRunning); - - @Description("Number of workers to use when executing the injector pipeline") - @Default.Integer(1) - int getInjectorNumWorkers(); - void setInjectorNumWorkers(int numWorkers); -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java deleted file mode 100644 index cf142a10fd..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure Pub/Sub topic/subscription in Beam examples. - */ -public interface ExamplePubsubTopicAndSubscriptionOptions extends ExamplePubsubTopicOptions { - @Description("Pub/Sub subscription") - @Default.InstanceFactory(PubsubSubscriptionFactory.class) - String getPubsubSubscription(); - void setPubsubSubscription(String subscription); - - /** - * Returns a default Pub/Sub subscription based on the project and the job names. - */ - class PubsubSubscriptionFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return "projects/" + options.as(GcpOptions.class).getProject() - + "/subscriptions/" + options.getJobName(); - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java deleted file mode 100644 index 86784b06da..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure Pub/Sub topic in Beam examples. - */ -public interface ExamplePubsubTopicOptions extends GcpOptions { - @Description("Pub/Sub topic") - @Default.InstanceFactory(PubsubTopicFactory.class) - String getPubsubTopic(); - void setPubsubTopic(String topic); - - /** - * Returns a default Pub/Sub topic based on the project and the job names. - */ - class PubsubTopicFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return "projects/" + options.as(GcpOptions.class).getProject() - + "/topics/" + options.getJobName(); - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java deleted file mode 100644 index 78f3849b40..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import com.google.api.client.googleapis.json.GoogleJsonResponseException; -import com.google.api.client.googleapis.services.AbstractGoogleClientRequest; -import com.google.api.client.http.HttpRequestInitializer; -import com.google.api.services.bigquery.Bigquery; -import com.google.api.services.bigquery.Bigquery.Datasets; -import com.google.api.services.bigquery.Bigquery.Tables; -import com.google.api.services.bigquery.model.Dataset; -import com.google.api.services.bigquery.model.DatasetReference; -import com.google.api.services.bigquery.model.Table; -import com.google.api.services.bigquery.model.TableReference; -import com.google.api.services.bigquery.model.TableSchema; -import com.google.api.services.pubsub.Pubsub; -import com.google.api.services.pubsub.model.Subscription; -import com.google.api.services.pubsub.model.Topic; -import com.google.auth.Credentials; -import com.google.auth.http.HttpCredentialsAdapter; -import com.google.cloud.hadoop.util.ChainingHttpRequestInitializer; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.common.util.concurrent.Uninterruptibles; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.extensions.gcp.auth.NullCredentialInitializer; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryOptions; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubOptions; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.util.BackOff; -import org.apache.beam.sdk.util.BackOffUtils; -import org.apache.beam.sdk.util.FluentBackoff; -import org.apache.beam.sdk.util.RetryHttpRequestInitializer; -import org.apache.beam.sdk.util.Sleeper; -import org.apache.beam.sdk.util.Transport; -import org.joda.time.Duration; - -/** - * The utility class that sets up and tears down external resources, - * and cancels the streaming pipelines once the program terminates. - * - *

It is used to run Beam examples. - */ -public class ExampleUtils { - - private static final int SC_NOT_FOUND = 404; - - /** - * \p{L} denotes the category of Unicode letters, - * so this pattern will match on everything that is not a letter. - * - *

It is used for tokenizing strings in the wordcount examples. - */ - public static final String TOKENIZER_PATTERN = "[^\\p{L}]+"; - - private final PipelineOptions options; - private Bigquery bigQueryClient = null; - private Pubsub pubsubClient = null; - private Set pipelinesToCancel = Sets.newHashSet(); - private List pendingMessages = Lists.newArrayList(); - - /** - * Do resources and runner options setup. - */ - public ExampleUtils(PipelineOptions options) { - this.options = options; - } - - /** - * Sets up external resources that are required by the example, - * such as Pub/Sub topics and BigQuery tables. - * - * @throws IOException if there is a problem setting up the resources - */ - public void setup() throws IOException { - Sleeper sleeper = Sleeper.DEFAULT; - BackOff backOff = - FluentBackoff.DEFAULT - .withMaxRetries(3).withInitialBackoff(Duration.millis(200)).backoff(); - Throwable lastException = null; - try { - do { - try { - setupPubsub(); - setupBigQueryTable(); - return; - } catch (GoogleJsonResponseException e) { - lastException = e; - } - } while (BackOffUtils.next(sleeper, backOff)); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - // Ignore InterruptedException - } - throw new RuntimeException(lastException); - } - - /** - * Sets up the Google Cloud Pub/Sub topic. - * - *

If the topic doesn't exist, a new topic with the given name will be created. - * - * @throws IOException if there is a problem setting up the Pub/Sub topic - */ - public void setupPubsub() throws IOException { - ExamplePubsubTopicAndSubscriptionOptions pubsubOptions = - options.as(ExamplePubsubTopicAndSubscriptionOptions.class); - if (!pubsubOptions.getPubsubTopic().isEmpty()) { - pendingMessages.add("**********************Set Up Pubsub************************"); - setupPubsubTopic(pubsubOptions.getPubsubTopic()); - pendingMessages.add("The Pub/Sub topic has been set up for this example: " - + pubsubOptions.getPubsubTopic()); - - if (!pubsubOptions.getPubsubSubscription().isEmpty()) { - setupPubsubSubscription( - pubsubOptions.getPubsubTopic(), pubsubOptions.getPubsubSubscription()); - pendingMessages.add("The Pub/Sub subscription has been set up for this example: " - + pubsubOptions.getPubsubSubscription()); - } - } - } - - /** - * Sets up the BigQuery table with the given schema. - * - *

If the table already exists, the schema has to match the given one. Otherwise, the example - * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema - * will be created. - * - * @throws IOException if there is a problem setting up the BigQuery table - */ - public void setupBigQueryTable() throws IOException { - ExampleBigQueryTableOptions bigQueryTableOptions = - options.as(ExampleBigQueryTableOptions.class); - if (bigQueryTableOptions.getBigQueryDataset() != null - && bigQueryTableOptions.getBigQueryTable() != null - && bigQueryTableOptions.getBigQuerySchema() != null) { - pendingMessages.add("******************Set Up Big Query Table*******************"); - setupBigQueryTable(bigQueryTableOptions.getProject(), - bigQueryTableOptions.getBigQueryDataset(), - bigQueryTableOptions.getBigQueryTable(), - bigQueryTableOptions.getBigQuerySchema()); - pendingMessages.add("The BigQuery table has been set up for this example: " - + bigQueryTableOptions.getProject() - + ":" + bigQueryTableOptions.getBigQueryDataset() - + "." + bigQueryTableOptions.getBigQueryTable()); - } - } - - /** - * Tears down external resources that can be deleted upon the example's completion. - */ - private void tearDown() { - pendingMessages.add("*************************Tear Down*************************"); - ExamplePubsubTopicAndSubscriptionOptions pubsubOptions = - options.as(ExamplePubsubTopicAndSubscriptionOptions.class); - if (!pubsubOptions.getPubsubTopic().isEmpty()) { - try { - deletePubsubTopic(pubsubOptions.getPubsubTopic()); - pendingMessages.add("The Pub/Sub topic has been deleted: " - + pubsubOptions.getPubsubTopic()); - } catch (IOException e) { - pendingMessages.add("Failed to delete the Pub/Sub topic : " - + pubsubOptions.getPubsubTopic()); - } - if (!pubsubOptions.getPubsubSubscription().isEmpty()) { - try { - deletePubsubSubscription(pubsubOptions.getPubsubSubscription()); - pendingMessages.add("The Pub/Sub subscription has been deleted: " - + pubsubOptions.getPubsubSubscription()); - } catch (IOException e) { - pendingMessages.add("Failed to delete the Pub/Sub subscription : " - + pubsubOptions.getPubsubSubscription()); - } - } - } - - ExampleBigQueryTableOptions bigQueryTableOptions = - options.as(ExampleBigQueryTableOptions.class); - if (bigQueryTableOptions.getBigQueryDataset() != null - && bigQueryTableOptions.getBigQueryTable() != null - && bigQueryTableOptions.getBigQuerySchema() != null) { - pendingMessages.add("The BigQuery table might contain the example's output, " - + "and it is not deleted automatically: " - + bigQueryTableOptions.getProject() - + ":" + bigQueryTableOptions.getBigQueryDataset() - + "." + bigQueryTableOptions.getBigQueryTable()); - pendingMessages.add("Please go to the Developers Console to delete it manually." - + " Otherwise, you may be charged for its usage."); - } - } - - /** - * Returns a BigQuery client builder using the specified {@link BigQueryOptions}. - */ - private static Bigquery.Builder newBigQueryClient(BigQueryOptions options) { - return new Bigquery.Builder(Transport.getTransport(), Transport.getJsonFactory(), - chainHttpRequestInitializer( - options.getGcpCredential(), - // Do not log 404. It clutters the output and is possibly even required by the caller. - new RetryHttpRequestInitializer(ImmutableList.of(404)))) - .setApplicationName(options.getAppName()) - .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); - } - - /** - * Returns a Pubsub client builder using the specified {@link PubsubOptions}. - */ - private static Pubsub.Builder newPubsubClient(PubsubOptions options) { - return new Pubsub.Builder(Transport.getTransport(), Transport.getJsonFactory(), - chainHttpRequestInitializer( - options.getGcpCredential(), - // Do not log 404. It clutters the output and is possibly even required by the caller. - new RetryHttpRequestInitializer(ImmutableList.of(404)))) - .setRootUrl(options.getPubsubRootUrl()) - .setApplicationName(options.getAppName()) - .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); - } - - private static HttpRequestInitializer chainHttpRequestInitializer( - Credentials credential, HttpRequestInitializer httpRequestInitializer) { - if (credential == null) { - return new ChainingHttpRequestInitializer( - new NullCredentialInitializer(), httpRequestInitializer); - } else { - return new ChainingHttpRequestInitializer( - new HttpCredentialsAdapter(credential), - httpRequestInitializer); - } - } - - private void setupBigQueryTable(String projectId, String datasetId, String tableId, - TableSchema schema) throws IOException { - if (bigQueryClient == null) { - bigQueryClient = newBigQueryClient(options.as(BigQueryOptions.class)).build(); - } - - Datasets datasetService = bigQueryClient.datasets(); - if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) { - Dataset newDataset = new Dataset().setDatasetReference( - new DatasetReference().setProjectId(projectId).setDatasetId(datasetId)); - datasetService.insert(projectId, newDataset).execute(); - } - - Tables tableService = bigQueryClient.tables(); - Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId)); - if (table == null) { - Table newTable = new Table().setSchema(schema).setTableReference( - new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId)); - tableService.insert(projectId, datasetId, newTable).execute(); - } else if (!table.getSchema().equals(schema)) { - throw new RuntimeException( - "Table exists and schemas do not match, expecting: " + schema.toPrettyString() - + ", actual: " + table.getSchema().toPrettyString()); - } - } - - private void setupPubsubTopic(String topic) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) { - pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute(); - } - } - - private void setupPubsubSubscription(String topic, String subscription) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) == null) { - Subscription subInfo = new Subscription() - .setAckDeadlineSeconds(60) - .setTopic(topic); - pubsubClient.projects().subscriptions().create(subscription, subInfo).execute(); - } - } - - /** - * Deletes the Google Cloud Pub/Sub topic. - * - * @throws IOException if there is a problem deleting the Pub/Sub topic - */ - private void deletePubsubTopic(String topic) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) { - pubsubClient.projects().topics().delete(topic).execute(); - } - } - - /** - * Deletes the Google Cloud Pub/Sub subscription. - * - * @throws IOException if there is a problem deleting the Pub/Sub subscription - */ - private void deletePubsubSubscription(String subscription) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) != null) { - pubsubClient.projects().subscriptions().delete(subscription).execute(); - } - } - - /** - * Waits for the pipeline to finish and cancels it before the program exists. - */ - public void waitToFinish(PipelineResult result) { - pipelinesToCancel.add(result); - if (!options.as(ExampleOptions.class).getKeepJobsRunning()) { - addShutdownHook(pipelinesToCancel); - } - try { - result.waitUntilFinish(); - } catch (UnsupportedOperationException e) { - // Do nothing if the given PipelineResult doesn't support waitUntilFinish(), - // such as EvaluationResults returned by DirectRunner. - tearDown(); - printPendingMessages(); - } catch (Exception e) { - throw new RuntimeException("Failed to wait the pipeline until finish: " + result); - } - } - - private void addShutdownHook(final Collection pipelineResults) { - Runtime.getRuntime().addShutdownHook(new Thread() { - @Override - public void run() { - tearDown(); - printPendingMessages(); - for (PipelineResult pipelineResult : pipelineResults) { - try { - pipelineResult.cancel(); - } catch (IOException e) { - System.out.println("Failed to cancel the job."); - System.out.println(e.getMessage()); - } - } - - for (PipelineResult pipelineResult : pipelineResults) { - boolean cancellationVerified = false; - for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) { - if (pipelineResult.getState().isTerminal()) { - cancellationVerified = true; - break; - } else { - System.out.println( - "The example pipeline is still running. Verifying the cancellation."); - } - Uninterruptibles.sleepUninterruptibly(10, TimeUnit.SECONDS); - } - if (!cancellationVerified) { - System.out.println("Failed to verify the cancellation for job: " + pipelineResult); - } - } - } - }); - } - - private void printPendingMessages() { - System.out.println(); - System.out.println("***********************************************************"); - System.out.println("***********************************************************"); - for (String message : pendingMessages) { - System.out.println(message); - } - System.out.println("***********************************************************"); - System.out.println("***********************************************************"); - } - - private static T executeNullIfNotFound( - AbstractGoogleClientRequest request) throws IOException { - try { - return request.execute(); - } catch (GoogleJsonResponseException e) { - if (e.getStatusCode() == SC_NOT_FOUND) { - return null; - } else { - throw e; - } - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java deleted file mode 100644 index c7296162b6..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import static com.google.common.base.MoreObjects.firstNonNull; - -import javax.annotation.Nullable; -import org.apache.beam.sdk.io.FileBasedSink; -import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy; -import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions; -import org.apache.beam.sdk.io.fs.ResourceId; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; -import org.joda.time.format.DateTimeFormatter; -import org.joda.time.format.ISODateTimeFormat; - -/** - * A {@link DoFn} that writes elements to files with names deterministically derived from the lower - * and upper bounds of their key (an {@link IntervalWindow}). - * - *

This is test utility code, not for end-users, so examples can be focused on their primary - * lessons. - */ -public class WriteOneFilePerWindow extends PTransform, PDone> { - private static final DateTimeFormatter FORMATTER = ISODateTimeFormat.hourMinute(); - private String filenamePrefix; - @Nullable - private Integer numShards; - - public WriteOneFilePerWindow(String filenamePrefix, Integer numShards) { - this.filenamePrefix = filenamePrefix; - this.numShards = numShards; - } - - @Override - public PDone expand(PCollection input) { - ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); - TextIO.Write write = - TextIO.write() - .to(new PerWindowFiles(resource)) - .withTempDirectory(resource.getCurrentDirectory()) - .withWindowedWrites(); - if (numShards != null) { - write = write.withNumShards(numShards); - } - return input.apply(write); - } - - /** - * A {@link FilenamePolicy} produces a base file name for a write based on metadata about the data - * being written. This always includes the shard number and the total number of shards. For - * windowed writes, it also includes the window and pane index (a sequence number assigned to each - * trigger firing). - */ - public static class PerWindowFiles extends FilenamePolicy { - - private final ResourceId baseFilename; - - public PerWindowFiles(ResourceId baseFilename) { - this.baseFilename = baseFilename; - } - - public String filenamePrefixForWindow(IntervalWindow window) { - String prefix = - baseFilename.isDirectory() ? "" : firstNonNull(baseFilename.getFilename(), ""); - return String.format("%s-%s-%s", - prefix, FORMATTER.print(window.start()), FORMATTER.print(window.end())); - } - - @Override - public ResourceId windowedFilename(int shardNumber, - int numShards, - BoundedWindow window, - PaneInfo paneInfo, - OutputFileHints outputFileHints) { - IntervalWindow intervalWindow = (IntervalWindow) window; - String filename = - String.format( - "%s-%s-of-%s%s", - filenamePrefixForWindow(intervalWindow), - shardNumber, - numShards, - outputFileHints.getSuggestedFilenameSuffix()); - return baseFilename - .getCurrentDirectory() - .resolve(filename, StandardResolveOptions.RESOLVE_FILE); - } - - @Override - public ResourceId unwindowedFilename( - int shardNumber, int numShards, OutputFileHints outputFileHints) { - throw new UnsupportedOperationException("Unsupported."); - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java deleted file mode 100644 index a286811293..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/GameStats.java +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.util.HashMap; -import java.util.Map; -import java.util.TimeZone; -import ${package}.common.ExampleUtils; -import ${package}.complete.game.utils.WriteWindowedToBigQuery; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.Mean; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.transforms.Values; -import org.apache.beam.sdk.transforms.View; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.Sessions; -import org.apache.beam.sdk.transforms.windowing.TimestampCombiner; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.joda.time.DateTimeZone; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This class is the fourth in a series of four pipelines that tell a story in a 'gaming' - * domain, following {@link UserScore}, {@link HourlyTeamScore}, and {@link LeaderBoard}. - * New concepts: session windows and finding session duration; use of both - * singleton and non-singleton side inputs. - * - *

This pipeline builds on the {@link LeaderBoard} functionality, and adds some "business - * intelligence" analysis: abuse detection and usage patterns. The pipeline derives the Mean user - * score sum for a window, and uses that information to identify likely spammers/robots. (The robots - * have a higher click rate than the human users). The 'robot' users are then filtered out when - * calculating the team scores. - * - *

Additionally, user sessions are tracked: that is, we find bursts of user activity using - * session windows. Then, the mean session duration information is recorded in the context of - * subsequent fixed windowing. (This could be used to tell us what games are giving us greater - * user retention). - * - *

Run {@code org.apache.beam.examples.complete.game.injector.Injector} to generate - * pubsub data for this pipeline. The {@code Injector} documentation provides more detail. - * - *

To execute this pipeline, specify the pipeline configuration like this: - *

{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=YOUR_RUNNER
- *   --dataset=YOUR-DATASET
- *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
- * }
- * 
- * - *

The BigQuery dataset you specify must already exist. The PubSub topic you specify should - * be the same topic to which the Injector is publishing. - */ -public class GameStats extends LeaderBoard { - - private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms"; - - private static DateTimeFormatter fmt = - DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); - - /** - * Filter out all but those users with a high clickrate, which we will consider as 'spammy' uesrs. - * We do this by finding the mean total score per user, then using that information as a side - * input to filter out all but those user scores that are larger than - * {@code (mean * SCORE_WEIGHT)}. - */ - // [START DocInclude_AbuseDetect] - public static class CalculateSpammyUsers - extends PTransform>, PCollection>> { - private static final Logger LOG = LoggerFactory.getLogger(CalculateSpammyUsers.class); - private static final double SCORE_WEIGHT = 2.5; - - @Override - public PCollection> expand(PCollection> userScores) { - - // Get the sum of scores for each user. - PCollection> sumScores = userScores - .apply("UserSum", Sum.integersPerKey()); - - // Extract the score from each element, and use it to find the global mean. - final PCollectionView globalMeanScore = sumScores.apply(Values.create()) - .apply(Mean.globally().asSingletonView()); - - // Filter the user sums using the global mean. - PCollection> filtered = sumScores - .apply("ProcessAndFilter", ParDo - // use the derived mean total score as a side input - .of(new DoFn, KV>() { - private final Counter numSpammerUsers = Metrics.counter("main", "SpammerUsers"); - @ProcessElement - public void processElement(ProcessContext c) { - Integer score = c.element().getValue(); - Double gmc = c.sideInput(globalMeanScore); - if (score > (gmc * SCORE_WEIGHT)) { - LOG.info("user " + c.element().getKey() + " spammer score " + score - + " with mean " + gmc); - numSpammerUsers.inc(); - c.output(c.element()); - } - } - }).withSideInputs(globalMeanScore)); - return filtered; - } - } - // [END DocInclude_AbuseDetect] - - /** - * Calculate and output an element's session duration. - */ - private static class UserSessionInfoFn extends DoFn, Integer> { - @ProcessElement - public void processElement(ProcessContext c, BoundedWindow window) { - IntervalWindow w = (IntervalWindow) window; - int duration = new Duration( - w.start(), w.end()).toPeriod().toStandardMinutes().getMinutes(); - c.output(duration); - } - } - - - /** - * Options supported by {@link GameStats}. - */ - interface Options extends LeaderBoard.Options { - @Description("Numeric value of fixed window duration for user analysis, in minutes") - @Default.Integer(60) - Integer getFixedWindowDuration(); - void setFixedWindowDuration(Integer value); - - @Description("Numeric value of gap between user sessions, in minutes") - @Default.Integer(5) - Integer getSessionGap(); - void setSessionGap(Integer value); - - @Description("Numeric value of fixed window for finding mean of user session duration, " - + "in minutes") - @Default.Integer(30) - Integer getUserActivityWindowDuration(); - void setUserActivityWindowDuration(Integer value); - - @Description("Prefix used for the BigQuery table names") - @Default.String("game_stats") - String getGameStatsTablePrefix(); - void setGameStatsTablePrefix(String value); - } - - - /** - * Create a map of information that describes how to write pipeline output to BigQuery. This map - * is used to write information about team score sums. - */ - protected static Map>> - configureWindowedWrite() { - Map>> tableConfigure = - new HashMap>>(); - tableConfigure.put( - "team", - new WriteWindowedToBigQuery.FieldInfo>( - "STRING", (c, w) -> c.element().getKey())); - tableConfigure.put( - "total_score", - new WriteWindowedToBigQuery.FieldInfo>( - "INTEGER", (c, w) -> c.element().getValue())); - tableConfigure.put( - "window_start", - new WriteWindowedToBigQuery.FieldInfo>( - "STRING", - (c, w) -> { - IntervalWindow window = (IntervalWindow) w; - return fmt.print(window.start()); - })); - tableConfigure.put( - "processing_time", - new WriteWindowedToBigQuery.FieldInfo>( - "STRING", (c, w) -> fmt.print(Instant.now()))); - return tableConfigure; - } - - /** - * Create a map of information that describes how to write pipeline output to BigQuery. This map - * is used to write information about mean user session time. - */ - protected static Map> - configureSessionWindowWrite() { - - Map> tableConfigure = - new HashMap>(); - tableConfigure.put( - "window_start", - new WriteWindowedToBigQuery.FieldInfo( - "STRING", - (c, w) -> { - IntervalWindow window = (IntervalWindow) w; - return fmt.print(window.start()); - })); - tableConfigure.put( - "mean_duration", - new WriteWindowedToBigQuery.FieldInfo("FLOAT", (c, w) -> c.element())); - return tableConfigure; - } - - - - public static void main(String[] args) throws Exception { - - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - // Enforce that this pipeline is always run in streaming mode. - options.setStreaming(true); - ExampleUtils exampleUtils = new ExampleUtils(options); - Pipeline pipeline = Pipeline.create(options); - - // Read Events from Pub/Sub using custom timestamps - PCollection rawEvents = pipeline - .apply(PubsubIO.readStrings() - .withTimestampAttribute(TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic())) - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())); - - // Extract username/score pairs from the event stream - PCollection> userEvents = - rawEvents.apply("ExtractUserScore", - MapElements - .into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) - .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))); - - // Calculate the total score per user over fixed windows, and - // cumulative updates for late data. - final PCollectionView> spammersView = userEvents - .apply("FixedWindowsUser", Window.>into( - FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))) - - // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate. - // These might be robots/spammers. - .apply("CalculateSpammyUsers", new CalculateSpammyUsers()) - // Derive a view from the collection of spammer users. It will be used as a side input - // in calculating the team score sums, below. - .apply("CreateSpammersView", View.asMap()); - - // [START DocInclude_FilterAndCalc] - // Calculate the total score per team over fixed windows, - // and emit cumulative updates for late data. Uses the side input derived above-- the set of - // suspected robots-- to filter out scores from those users from the sum. - // Write the results to BigQuery. - rawEvents - .apply("WindowIntoFixedWindows", Window.into( - FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))) - // Filter out the detected spammer users, using the side input derived above. - .apply("FilterOutSpammers", ParDo - .of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - // If the user is not in the spammers Map, output the data element. - if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) { - c.output(c.element()); - } - } - }).withSideInputs(spammersView)) - // Extract and sum teamname/score pairs from the event data. - .apply("ExtractTeamScore", new ExtractAndSumScore("team")) - // [END DocInclude_FilterAndCalc] - // Write the result to BigQuery - .apply("WriteTeamSums", - new WriteWindowedToBigQuery>( - options.as(GcpOptions.class).getProject(), - options.getDataset(), - options.getGameStatsTablePrefix() + "_team", configureWindowedWrite())); - - - // [START DocInclude_SessionCalc] - // Detect user sessions-- that is, a burst of activity separated by a gap from further - // activity. Find and record the mean session lengths. - // This information could help the game designers track the changing user engagement - // as their set of games changes. - userEvents - .apply("WindowIntoSessions", Window.>into( - Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap()))) - .withTimestampCombiner(TimestampCombiner.END_OF_WINDOW)) - // For this use, we care only about the existence of the session, not any particular - // information aggregated over it, so the following is an efficient way to do that. - .apply(Combine.perKey(x -> 0)) - // Get the duration per session. - .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn())) - // [END DocInclude_SessionCalc] - // [START DocInclude_Rewindow] - // Re-window to process groups of session sums according to when the sessions complete. - .apply("WindowToExtractSessionMean", Window.into( - FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration())))) - // Find the mean session duration in each window. - .apply(Mean.globally().withoutDefaults()) - // Write this info to a BigQuery table. - .apply("WriteAvgSessionLength", - new WriteWindowedToBigQuery( - options.as(GcpOptions.class).getProject(), - options.getDataset(), - options.getGameStatsTablePrefix() + "_sessions", configureSessionWindowWrite())); - // [END DocInclude_Rewindow] - - - // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the - // command line. - PipelineResult result = pipeline.run(); - exampleUtils.waitToFinish(result); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java deleted file mode 100644 index e60af492e4..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/HourlyTeamScore.java +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.util.HashMap; -import java.util.Map; -import java.util.TimeZone; -import ${package}.complete.game.utils.WriteToText; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.WithTimestamps; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.joda.time.DateTimeZone; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; - -/** - * This class is the second in a series of four pipelines that tell a story in a 'gaming' - * domain, following {@link UserScore}. In addition to the concepts introduced in {@link UserScore}, - * new concepts include: windowing and element timestamps; use of {@code Filter.by()}. - * - *

This pipeline processes data collected from gaming events in batch, building on {@link - * UserScore} but using fixed windows. It calculates the sum of scores per team, for each window, - * optionally allowing specification of two timestamps before and after which data is filtered out. - * This allows a model where late data collected after the intended analysis window can be included, - * and any late-arriving data prior to the beginning of the analysis window can be removed as well. - * By using windowing and adding element timestamps, we can do finer-grained analysis than with the - * {@link UserScore} pipeline. However, our batch processing is high-latency, in that we don't get - * results from plays at the beginning of the batch's time period until the batch is processed. - * - *

To execute this pipeline, specify the pipeline configuration like this: - *

{@code
- *   --tempLocation=YOUR_TEMP_DIRECTORY
- *   --runner=YOUR_RUNNER
- *   --output=YOUR_OUTPUT_DIRECTORY
- *   (possibly options specific to your runner or permissions for your temp/output locations)
- * }
- * 
- * - *

Optionally include {@code --input} to specify the batch input file path. - * To indicate a time after which the data should be filtered out, include the - * {@code --stopMin} arg. E.g., {@code --stopMin=2015-10-18-23-59} indicates that any data - * timestamped after 23:59 PST on 2015-10-18 should not be included in the analysis. - * To indicate a time before which data should be filtered out, include the {@code --startMin} arg. - * If you're using the default input specified in {@link UserScore}, - * "gs://apache-beam-samples/game/gaming_data*.csv", then - * {@code --startMin=2015-11-16-16-10 --stopMin=2015-11-17-16-10} are good values. - */ -public class HourlyTeamScore extends UserScore { - - private static DateTimeFormatter fmt = - DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); - private static DateTimeFormatter minFmt = - DateTimeFormat.forPattern("yyyy-MM-dd-HH-mm") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); - - - /** - * Options supported by {@link HourlyTeamScore}. - */ - interface Options extends UserScore.Options { - - @Description("Numeric value of fixed window duration, in minutes") - @Default.Integer(60) - Integer getWindowDuration(); - void setWindowDuration(Integer value); - - @Description("String representation of the first minute after which to generate results," - + "in the format: yyyy-MM-dd-HH-mm . This time should be in PST." - + "Any input data timestamped prior to that minute won't be included in the sums.") - @Default.String("1970-01-01-00-00") - String getStartMin(); - void setStartMin(String value); - - @Description("String representation of the first minute for which to not generate results," - + "in the format: yyyy-MM-dd-HH-mm . This time should be in PST." - + "Any input data timestamped after that minute won't be included in the sums.") - @Default.String("2100-01-01-00-00") - String getStopMin(); - void setStopMin(String value); - } - - /** - * Create a map of information that describes how to write pipeline output to text. This map - * is passed to the {@link WriteToText} constructor to write team score sums and - * includes information about window start time. - */ - protected static Map>> - configureOutput() { - Map>> config = - new HashMap>>(); - config.put("team", (c, w) -> c.element().getKey()); - config.put("total_score", (c, w) -> c.element().getValue()); - config.put( - "window_start", - (c, w) -> { - IntervalWindow window = (IntervalWindow) w; - return fmt.print(window.start()); - }); - return config; - } - - - /** - * Run a batch pipeline to do windowed analysis of the data. - */ - // [START DocInclude_HTSMain] - public static void main(String[] args) throws Exception { - // Begin constructing a pipeline configured by commandline flags. - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - Pipeline pipeline = Pipeline.create(options); - - final Instant stopMinTimestamp = new Instant(minFmt.parseMillis(options.getStopMin())); - final Instant startMinTimestamp = new Instant(minFmt.parseMillis(options.getStartMin())); - - // Read 'gaming' events from a text file. - pipeline.apply(TextIO.read().from(options.getInput())) - // Parse the incoming data. - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) - - // Filter out data before and after the given times so that it is not included - // in the calculations. As we collect data in batches (say, by day), the batch for the day - // that we want to analyze could potentially include some late-arriving data from the previous - // day. If so, we want to weed it out. Similarly, if we include data from the following day - // (to scoop up late-arriving events from the day we're analyzing), we need to weed out events - // that fall after the time period we want to analyze. - // [START DocInclude_HTSFilters] - .apply("FilterStartTime", Filter.by( - (GameActionInfo gInfo) - -> gInfo.getTimestamp() > startMinTimestamp.getMillis())) - .apply("FilterEndTime", Filter.by( - (GameActionInfo gInfo) - -> gInfo.getTimestamp() < stopMinTimestamp.getMillis())) - // [END DocInclude_HTSFilters] - - // [START DocInclude_HTSAddTsAndWindow] - // Add an element timestamp based on the event log, and apply fixed windowing. - .apply("AddEventTimestamps", - WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp()))) - .apply("FixedWindowsTeam", Window.into( - FixedWindows.of(Duration.standardMinutes(options.getWindowDuration())))) - // [END DocInclude_HTSAddTsAndWindow] - - // Extract and sum teamname/score pairs from the event data. - .apply("ExtractTeamScore", new ExtractAndSumScore("team")) - .apply("WriteTeamScoreSums", - new WriteToText>( - options.getOutput(), - configureOutput(), - true)); - - pipeline.run().waitUntilFinish(); - } - // [END DocInclude_HTSMain] - -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java deleted file mode 100644 index 4f0ee28128..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/LeaderBoard.java +++ /dev/null @@ -1,320 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import com.google.common.annotations.VisibleForTesting; -import java.util.HashMap; -import java.util.Map; -import java.util.TimeZone; -import ${package}.common.ExampleOptions; -import ${package}.common.ExampleUtils; -import ${package}.complete.game.utils.WriteToBigQuery; -import ${package}.complete.game.utils.WriteWindowedToBigQuery; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.options.StreamingOptions; -import org.apache.beam.sdk.options.Validation; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime; -import org.apache.beam.sdk.transforms.windowing.AfterWatermark; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.GlobalWindows; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.Repeatedly; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.joda.time.DateTimeZone; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; - -/** - * This class is the third in a series of four pipelines that tell a story in a 'gaming' domain, - * following {@link UserScore} and {@link HourlyTeamScore}. Concepts include: processing unbounded - * data using fixed windows; use of custom timestamps and event-time processing; generation of - * early/speculative results; using .accumulatingFiredPanes() to do cumulative processing of late- - * arriving data. - * - *

This pipeline processes an unbounded stream of 'game events'. The calculation of the team - * scores uses fixed windowing based on event time (the time of the game play event), not - * processing time (the time that an event is processed by the pipeline). The pipeline calculates - * the sum of scores per team, for each window. By default, the team scores are calculated using - * one-hour windows. - * - *

In contrast-- to demo another windowing option-- the user scores are calculated using a - * global window, which periodically (every ten minutes) emits cumulative user score sums. - * - *

In contrast to the previous pipelines in the series, which used static, finite input data, - * here we're using an unbounded data source, which lets us provide speculative results, and allows - * handling of late data, at much lower latency. We can use the early/speculative results to keep a - * 'leaderboard' updated in near-realtime. Our handling of late data lets us generate correct - * results, e.g. for 'team prizes'. We're now outputting window results as they're - * calculated, giving us much lower latency than with the previous batch examples. - * - *

Run {@code injector.Injector} to generate pubsub data for this pipeline. The Injector - * documentation provides more detail on how to do this. - * - *

To execute this pipeline, specify the pipeline configuration like this: - *

{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=YOUR_RUNNER
- *   --dataset=YOUR-DATASET
- *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
- * }
- * 
- * - *

The BigQuery dataset you specify must already exist. The PubSub topic you specify should be - * the same topic to which the Injector is publishing. - */ -public class LeaderBoard extends HourlyTeamScore { - - private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms"; - - private static DateTimeFormatter fmt = - DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); - static final Duration FIVE_MINUTES = Duration.standardMinutes(5); - static final Duration TEN_MINUTES = Duration.standardMinutes(10); - - - /** - * Options supported by {@link LeaderBoard}. - */ - interface Options extends HourlyTeamScore.Options, ExampleOptions, StreamingOptions { - - @Description("BigQuery Dataset to write tables to. Must already exist.") - @Validation.Required - String getDataset(); - void setDataset(String value); - - @Description("Pub/Sub topic to read from") - @Validation.Required - String getTopic(); - void setTopic(String value); - - @Description("Numeric value of fixed window duration for team analysis, in minutes") - @Default.Integer(60) - Integer getTeamWindowDuration(); - void setTeamWindowDuration(Integer value); - - @Description("Numeric value of allowed data lateness, in minutes") - @Default.Integer(120) - Integer getAllowedLateness(); - void setAllowedLateness(Integer value); - - @Description("Prefix used for the BigQuery table names") - @Default.String("leaderboard") - String getLeaderBoardTableName(); - void setLeaderBoardTableName(String value); - } - - /** - * Create a map of information that describes how to write pipeline output to BigQuery. This map - * is used to write team score sums and includes event timing information. - */ - protected static Map>> - configureWindowedTableWrite() { - - Map>> tableConfigure = - new HashMap>>(); - tableConfigure.put( - "team", - new WriteWindowedToBigQuery.FieldInfo>( - "STRING", (c, w) -> c.element().getKey())); - tableConfigure.put( - "total_score", - new WriteWindowedToBigQuery.FieldInfo>( - "INTEGER", (c, w) -> c.element().getValue())); - tableConfigure.put( - "window_start", - new WriteWindowedToBigQuery.FieldInfo>( - "STRING", - (c, w) -> { - IntervalWindow window = (IntervalWindow) w; - return fmt.print(window.start()); - })); - tableConfigure.put( - "processing_time", - new WriteWindowedToBigQuery.FieldInfo>( - "STRING", (c, w) -> fmt.print(Instant.now()))); - tableConfigure.put( - "timing", - new WriteWindowedToBigQuery.FieldInfo>( - "STRING", (c, w) -> c.pane().getTiming().toString())); - return tableConfigure; - } - - - /** - * Create a map of information that describes how to write pipeline output to BigQuery. This map - * is passed to the {@link WriteToBigQuery} constructor to write user score sums. - */ - protected static Map>> - configureBigQueryWrite() { - Map>> tableConfigure = - new HashMap>>(); - tableConfigure.put( - "user", - new WriteToBigQuery.FieldInfo>( - "STRING", (c, w) -> c.element().getKey())); - tableConfigure.put( - "total_score", - new WriteToBigQuery.FieldInfo>( - "INTEGER", (c, w) -> c.element().getValue())); - return tableConfigure; - } - - - /** - * Create a map of information that describes how to write pipeline output to BigQuery. This map - * is used to write user score sums. - */ - protected static Map>> - configureGlobalWindowBigQueryWrite() { - - Map>> tableConfigure = - configureBigQueryWrite(); - tableConfigure.put( - "processing_time", - new WriteToBigQuery.FieldInfo>( - "STRING", (c, w) -> fmt.print(Instant.now()))); - return tableConfigure; - } - - - public static void main(String[] args) throws Exception { - - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - // Enforce that this pipeline is always run in streaming mode. - options.setStreaming(true); - ExampleUtils exampleUtils = new ExampleUtils(options); - Pipeline pipeline = Pipeline.create(options); - - // Read game events from Pub/Sub using custom timestamps, which are extracted from the pubsub - // data elements, and parse the data. - PCollection gameEvents = pipeline - .apply(PubsubIO.readStrings() - .withTimestampAttribute(TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic())) - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())); - - gameEvents - .apply( - "CalculateTeamScores", - new CalculateTeamScores( - Duration.standardMinutes(options.getTeamWindowDuration()), - Duration.standardMinutes(options.getAllowedLateness()))) - // Write the results to BigQuery. - .apply( - "WriteTeamScoreSums", - new WriteWindowedToBigQuery>( - options.as(GcpOptions.class).getProject(), - options.getDataset(), - options.getLeaderBoardTableName() + "_team", - configureWindowedTableWrite())); - gameEvents - .apply( - "CalculateUserScores", - new CalculateUserScores(Duration.standardMinutes(options.getAllowedLateness()))) - // Write the results to BigQuery. - .apply( - "WriteUserScoreSums", - new WriteToBigQuery>( - options.as(GcpOptions.class).getProject(), - options.getDataset(), - options.getLeaderBoardTableName() + "_user", - configureGlobalWindowBigQueryWrite())); - - // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the - // command line. - PipelineResult result = pipeline.run(); - exampleUtils.waitToFinish(result); - } - - /** - * Calculates scores for each team within the configured window duration. - */ - // [START DocInclude_WindowAndTrigger] - // Extract team/score pairs from the event stream, using hour-long windows by default. - @VisibleForTesting - static class CalculateTeamScores - extends PTransform, PCollection>> { - private final Duration teamWindowDuration; - private final Duration allowedLateness; - - CalculateTeamScores(Duration teamWindowDuration, Duration allowedLateness) { - this.teamWindowDuration = teamWindowDuration; - this.allowedLateness = allowedLateness; - } - - @Override - public PCollection> expand(PCollection infos) { - return infos.apply("LeaderboardTeamFixedWindows", - Window.into(FixedWindows.of(teamWindowDuration)) - // We will get early (speculative) results as well as cumulative - // processing of late data. - .triggering(AfterWatermark.pastEndOfWindow() - .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane() - .plusDelayOf(FIVE_MINUTES)) - .withLateFirings(AfterProcessingTime.pastFirstElementInPane() - .plusDelayOf(TEN_MINUTES))) - .withAllowedLateness(allowedLateness) - .accumulatingFiredPanes()) - // Extract and sum teamname/score pairs from the event data. - .apply("ExtractTeamScore", new ExtractAndSumScore("team")); - } - } - // [END DocInclude_WindowAndTrigger] - - // [START DocInclude_ProcTimeTrigger] - /** - * Extract user/score pairs from the event stream using processing time, via global windowing. - * Get periodic updates on all users' running scores. - */ - @VisibleForTesting - static class CalculateUserScores - extends PTransform, PCollection>> { - private final Duration allowedLateness; - - CalculateUserScores(Duration allowedLateness) { - this.allowedLateness = allowedLateness; - } - - @Override - public PCollection> expand(PCollection input) { - return input.apply("LeaderboardUserGlobalWindow", - Window.into(new GlobalWindows()) - // Get periodic results every ten minutes. - .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane() - .plusDelayOf(TEN_MINUTES))) - .accumulatingFiredPanes() - .withAllowedLateness(allowedLateness)) - // Extract and sum username/score pairs from the event data. - .apply("ExtractUserScore", new ExtractAndSumScore("user")); - } - } - // [END DocInclude_ProcTimeTrigger] -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java deleted file mode 100644 index c693614c57..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/UserScore.java +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.util.HashMap; -import java.util.Map; -import org.apache.avro.reflect.Nullable; -import ${package}.complete.game.utils.WriteToText; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.AvroCoder; -import org.apache.beam.sdk.coders.DefaultCoder; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.options.Validation; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This class is the first in a series of four pipelines that tell a story in a 'gaming' domain. - * Concepts: batch processing, reading input from text files, writing output to - * text files, using standalone DoFns, use of the sum per key transform, and use of - * Java 8 lambda syntax. - * - *

In this gaming scenario, many users play, as members of different teams, over the course of a - * day, and their actions are logged for processing. Some of the logged game events may be late- - * arriving, if users play on mobile devices and go transiently offline for a period. - * - *

This pipeline does batch processing of data collected from gaming events. It calculates the - * sum of scores per user, over an entire batch of gaming data (collected, say, for each day). The - * batch processing will not include any late data that arrives after the day's cutoff point. - * - *

To execute this pipeline, specify the pipeline configuration like this: - *

{@code
- *   --tempLocation=YOUR_TEMP_DIRECTORY
- *   --runner=YOUR_RUNNER
- *   --output=YOUR_OUTPUT_DIRECTORY
- *   (possibly options specific to your runner or permissions for your temp/output locations)
- * }
- * 
- * - *

Optionally include the --input argument to specify a batch input file. - * See the --input default value for example batch data file, or use {@code injector.Injector} to - * generate your own batch data. - */ -public class UserScore { - - /** - * Class to hold info about a game event. - */ - @DefaultCoder(AvroCoder.class) - static class GameActionInfo { - @Nullable String user; - @Nullable String team; - @Nullable Integer score; - @Nullable Long timestamp; - - public GameActionInfo() {} - - public GameActionInfo(String user, String team, Integer score, Long timestamp) { - this.user = user; - this.team = team; - this.score = score; - this.timestamp = timestamp; - } - - public String getUser() { - return this.user; - } - public String getTeam() { - return this.team; - } - public Integer getScore() { - return this.score; - } - public String getKey(String keyname) { - if (keyname.equals("team")) { - return this.team; - } else { // return username as default - return this.user; - } - } - public Long getTimestamp() { - return this.timestamp; - } - } - - - /** - * Parses the raw game event info into GameActionInfo objects. Each event line has the following - * format: username,teamname,score,timestamp_in_ms,readable_time - * e.g.: - * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224 - * The human-readable time string is not used here. - */ - static class ParseEventFn extends DoFn { - - // Log and count parse errors. - private static final Logger LOG = LoggerFactory.getLogger(ParseEventFn.class); - private final Counter numParseErrors = Metrics.counter("main", "ParseErrors"); - - @ProcessElement - public void processElement(ProcessContext c) { - String[] components = c.element().split(","); - try { - String user = components[0].trim(); - String team = components[1].trim(); - Integer score = Integer.parseInt(components[2].trim()); - Long timestamp = Long.parseLong(components[3].trim()); - GameActionInfo gInfo = new GameActionInfo(user, team, score, timestamp); - c.output(gInfo); - } catch (ArrayIndexOutOfBoundsException | NumberFormatException e) { - numParseErrors.inc(); - LOG.info("Parse error on " + c.element() + ", " + e.getMessage()); - } - } - } - - /** - * A transform to extract key/score information from GameActionInfo, and sum the scores. The - * constructor arg determines whether 'team' or 'user' info is extracted. - */ - // [START DocInclude_USExtractXform] - public static class ExtractAndSumScore - extends PTransform, PCollection>> { - - private final String field; - - ExtractAndSumScore(String field) { - this.field = field; - } - - @Override - public PCollection> expand( - PCollection gameInfo) { - - return gameInfo - .apply(MapElements - .into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) - .via((GameActionInfo gInfo) -> KV.of(gInfo.getKey(field), gInfo.getScore()))) - .apply(Sum.integersPerKey()); - } - } - // [END DocInclude_USExtractXform] - - - /** - * Options supported by {@link UserScore}. - */ - public interface Options extends PipelineOptions { - - @Description("Path to the data file(s) containing game data.") - // The default maps to two large Google Cloud Storage files (each ~12GB) holding two subsequent - // day's worth (roughly) of data. - @Default.String("gs://apache-beam-samples/game/gaming_data*.csv") - String getInput(); - void setInput(String value); - - // Set this required option to specify where to write the output. - @Description("Path of the file to write to.") - @Validation.Required - String getOutput(); - void setOutput(String value); - } - - /** - * Create a map of information that describes how to write pipeline output to text. This map - * is passed to the {@link WriteToText} constructor to write user score sums. - */ - protected static Map>> - configureOutput() { - Map>> config = - new HashMap>>(); - config.put("user", (c, w) -> c.element().getKey()); - config.put("total_score", (c, w) -> c.element().getValue()); - return config; - } - - /** - * Run a batch pipeline. - */ - // [START DocInclude_USMain] - public static void main(String[] args) throws Exception { - // Begin constructing a pipeline configured by commandline flags. - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - Pipeline pipeline = Pipeline.create(options); - - // Read events from a text file and parse them. - pipeline - .apply(TextIO.read().from(options.getInput())) - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) - // Extract and sum username/score pairs from the event data. - .apply("ExtractUserScore", new ExtractAndSumScore("user")) - .apply( - "WriteUserScoreSums", - new WriteToText>( - options.getOutput(), - configureOutput(), - false)); - - // Run the batch pipeline. - pipeline.run().waitUntilFinish(); - } - // [END DocInclude_USMain] -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java deleted file mode 100644 index 4814ffb66f..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/Injector.java +++ /dev/null @@ -1,413 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.injector; - -import com.google.api.services.pubsub.Pubsub; -import com.google.api.services.pubsub.model.PublishRequest; -import com.google.api.services.pubsub.model.PubsubMessage; -import com.google.common.collect.ImmutableMap; -import java.io.BufferedOutputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; -import java.util.TimeZone; -import org.joda.time.DateTimeZone; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; - - -/** - * This is a generator that simulates usage data from a mobile game, and either publishes the data - * to a pubsub topic or writes it to a file. - * - *

The general model used by the generator is the following. There is a set of teams with team - * members. Each member is scoring points for their team. After some period, a team will dissolve - * and a new one will be created in its place. There is also a set of 'Robots', or spammer users. - * They hop from team to team. The robots are set to have a higher 'click rate' (generate more - * events) than the regular team members. - * - *

Each generated line of data has the following form: - * username,teamname,score,timestamp_in_ms,readable_time - * e.g.: - * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224 - * - *

The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if - * specified. It takes the following arguments: - * {@code Injector project-name (topic-name|none) (filename|none)}. - * - *

To run the Injector in the mode where it publishes to PubSub, you will need to authenticate - * locally using project-based service account credentials to avoid running over PubSub - * quota. - * See https://developers.google.com/identity/protocols/application-default-credentials - * for more information on using service account credentials. Set the GOOGLE_APPLICATION_CREDENTIALS - * environment variable to point to your downloaded service account credentials before starting the - * program, e.g.: - * {@code export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/credentials-key.json}. - * If you do not do this, then your injector will only run for a few minutes on your - * 'user account' credentials before you will start to see quota error messages like: - * "Request throttled due to user QPS limit being reached", and see this exception: - * ".com.google.api.client.googleapis.json.GoogleJsonResponseException: 429 Too Many Requests". - * Once you've set up your credentials, run the Injector like this": - *

{@code
- * Injector   none
- * }
- * 
- * The pubsub topic will be created if it does not exist. - * - *

To run the injector in write-to-file-mode, set the topic name to "none" and specify the - * filename: - *

{@code
- * Injector  none 
- * }
- * 
- */ -class Injector { - private static Pubsub pubsub; - private static Random random = new Random(); - private static String topic; - private static String project; - private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms"; - - // QPS ranges from 800 to 1000. - private static final int MIN_QPS = 800; - private static final int QPS_RANGE = 200; - // How long to sleep, in ms, between creation of the threads that make API requests to PubSub. - private static final int THREAD_SLEEP_MS = 500; - - // Lists used to generate random team names. - private static final ArrayList COLORS = - new ArrayList(Arrays.asList( - "Magenta", "AliceBlue", "Almond", "Amaranth", "Amber", - "Amethyst", "AndroidGreen", "AntiqueBrass", "Fuchsia", "Ruby", "AppleGreen", - "Apricot", "Aqua", "ArmyGreen", "Asparagus", "Auburn", "Azure", "Banana", - "Beige", "Bisque", "BarnRed", "BattleshipGrey")); - - private static final ArrayList ANIMALS = - new ArrayList(Arrays.asList( - "Echidna", "Koala", "Wombat", "Marmot", "Quokka", "Kangaroo", "Dingo", "Numbat", "Emu", - "Wallaby", "CaneToad", "Bilby", "Possum", "Cassowary", "Kookaburra", "Platypus", - "Bandicoot", "Cockatoo", "Antechinus")); - - // The list of live teams. - private static ArrayList liveTeams = new ArrayList(); - - private static DateTimeFormatter fmt = - DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); - - - // The total number of robots in the system. - private static final int NUM_ROBOTS = 20; - // Determines the chance that a team will have a robot team member. - private static final int ROBOT_PROBABILITY = 3; - private static final int NUM_LIVE_TEAMS = 15; - private static final int BASE_MEMBERS_PER_TEAM = 5; - private static final int MEMBERS_PER_TEAM = 15; - private static final int MAX_SCORE = 20; - private static final int LATE_DATA_RATE = 5 * 60 * 2; // Every 10 minutes - private static final int BASE_DELAY_IN_MILLIS = 5 * 60 * 1000; // 5-10 minute delay - private static final int FUZZY_DELAY_IN_MILLIS = 5 * 60 * 1000; - - // The minimum time a 'team' can live. - private static final int BASE_TEAM_EXPIRATION_TIME_IN_MINS = 20; - private static final int TEAM_EXPIRATION_TIME_IN_MINS = 20; - - - /** - * A class for holding team info: the name of the team, when it started, - * and the current team members. Teams may but need not include one robot team member. - */ - private static class TeamInfo { - String teamName; - long startTimeInMillis; - int expirationPeriod; - // The team might but need not include 1 robot. Will be non-null if so. - String robot; - int numMembers; - - private TeamInfo(String teamName, long startTimeInMillis, String robot) { - this.teamName = teamName; - this.startTimeInMillis = startTimeInMillis; - // How long until this team is dissolved. - this.expirationPeriod = random.nextInt(TEAM_EXPIRATION_TIME_IN_MINS) - + BASE_TEAM_EXPIRATION_TIME_IN_MINS; - this.robot = robot; - // Determine the number of team members. - numMembers = random.nextInt(MEMBERS_PER_TEAM) + BASE_MEMBERS_PER_TEAM; - } - - String getTeamName() { - return teamName; - } - String getRobot() { - return robot; - } - - long getStartTimeInMillis() { - return startTimeInMillis; - } - long getEndTimeInMillis() { - return startTimeInMillis + (expirationPeriod * 60 * 1000); - } - String getRandomUser() { - int userNum = random.nextInt(numMembers); - return "user" + userNum + "_" + teamName; - } - - int numMembers() { - return numMembers; - } - - @Override - public String toString() { - return "(" + teamName + ", num members: " + numMembers() + ", starting at: " - + startTimeInMillis + ", expires in: " + expirationPeriod + ", robot: " + robot + ")"; - } - } - - /** Utility to grab a random element from an array of Strings. */ - private static String randomElement(ArrayList list) { - int index = random.nextInt(list.size()); - return list.get(index); - } - - /** - * Get and return a random team. If the selected team is too old w.r.t its expiration, remove - * it, replacing it with a new team. - */ - private static TeamInfo randomTeam(ArrayList list) { - int index = random.nextInt(list.size()); - TeamInfo team = list.get(index); - // If the selected team is expired, remove it and return a new team. - long currTime = System.currentTimeMillis(); - if ((team.getEndTimeInMillis() < currTime) || team.numMembers() == 0) { - System.out.println("\nteam " + team + " is too old; replacing."); - System.out.println("start time: " + team.getStartTimeInMillis() - + ", end time: " + team.getEndTimeInMillis() - + ", current time:" + currTime); - removeTeam(index); - // Add a new team in its stead. - return (addLiveTeam()); - } else { - return team; - } - } - - /** - * Create and add a team. Possibly add a robot to the team. - */ - private static synchronized TeamInfo addLiveTeam() { - String teamName = randomElement(COLORS) + randomElement(ANIMALS); - String robot = null; - // Decide if we want to add a robot to the team. - if (random.nextInt(ROBOT_PROBABILITY) == 0) { - robot = "Robot-" + random.nextInt(NUM_ROBOTS); - } - // Create the new team. - TeamInfo newTeam = new TeamInfo(teamName, System.currentTimeMillis(), robot); - liveTeams.add(newTeam); - System.out.println("[+" + newTeam + "]"); - return newTeam; - } - - /** - * Remove a specific team. - */ - private static synchronized void removeTeam(int teamIndex) { - TeamInfo removedTeam = liveTeams.remove(teamIndex); - System.out.println("[-" + removedTeam + "]"); - } - - /** Generate a user gaming event. */ - private static String generateEvent(Long currTime, int delayInMillis) { - TeamInfo team = randomTeam(liveTeams); - String teamName = team.getTeamName(); - String user; - final int parseErrorRate = 900000; - - String robot = team.getRobot(); - // If the team has an associated robot team member... - if (robot != null) { - // Then use that robot for the message with some probability. - // Set this probability to higher than that used to select any of the 'regular' team - // members, so that if there is a robot on the team, it has a higher click rate. - if (random.nextInt(team.numMembers() / 2) == 0) { - user = robot; - } else { - user = team.getRandomUser(); - } - } else { // No robot. - user = team.getRandomUser(); - } - String event = user + "," + teamName + "," + random.nextInt(MAX_SCORE); - // Randomly introduce occasional parse errors. - if (random.nextInt(parseErrorRate) == 0) { - System.out.println("Introducing a parse error."); - event = "THIS LINE REPRESENTS CORRUPT DATA AND WILL CAUSE A PARSE ERROR"; - } - return addTimeInfoToEvent(event, currTime, delayInMillis); - } - - /** - * Add time info to a generated gaming event. - */ - private static String addTimeInfoToEvent(String message, Long currTime, int delayInMillis) { - String eventTimeString = - Long.toString((currTime - delayInMillis) / 1000 * 1000); - // Add a (redundant) 'human-readable' date string to make the data semantics more clear. - String dateString = fmt.print(currTime); - message = message + "," + eventTimeString + "," + dateString; - return message; - } - - /** - * Publish 'numMessages' arbitrary events from live users with the provided delay, to a - * PubSub topic. - */ - public static void publishData(int numMessages, int delayInMillis) - throws IOException { - List pubsubMessages = new ArrayList<>(); - - for (int i = 0; i < Math.max(1, numMessages); i++) { - Long currTime = System.currentTimeMillis(); - String message = generateEvent(currTime, delayInMillis); - PubsubMessage pubsubMessage = new PubsubMessage() - .encodeData(message.getBytes("UTF-8")); - pubsubMessage.setAttributes( - ImmutableMap.of(TIMESTAMP_ATTRIBUTE, - Long.toString((currTime - delayInMillis) / 1000 * 1000))); - if (delayInMillis != 0) { - System.out.println(pubsubMessage.getAttributes()); - System.out.println("late data for: " + message); - } - pubsubMessages.add(pubsubMessage); - } - - PublishRequest publishRequest = new PublishRequest(); - publishRequest.setMessages(pubsubMessages); - pubsub.projects().topics().publish(topic, publishRequest).execute(); - } - - /** - * Publish generated events to a file. - */ - public static void publishDataToFile(String fileName, int numMessages, int delayInMillis) - throws IOException { - PrintWriter out = new PrintWriter(new OutputStreamWriter( - new BufferedOutputStream(new FileOutputStream(fileName, true)), "UTF-8")); - - try { - for (int i = 0; i < Math.max(1, numMessages); i++) { - Long currTime = System.currentTimeMillis(); - String message = generateEvent(currTime, delayInMillis); - out.println(message); - } - } catch (Exception e) { - e.printStackTrace(); - } finally { - if (out != null) { - out.flush(); - out.close(); - } - } - } - - - public static void main(String[] args) throws IOException, InterruptedException { - if (args.length < 3) { - System.out.println("Usage: Injector project-name (topic-name|none) (filename|none)"); - System.exit(1); - } - boolean writeToFile = false; - boolean writeToPubsub = true; - project = args[0]; - String topicName = args[1]; - String fileName = args[2]; - // The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if - // specified; otherwise, it will try to write to a file. - if (topicName.equalsIgnoreCase("none")) { - writeToFile = true; - writeToPubsub = false; - } - if (writeToPubsub) { - // Create the PubSub client. - pubsub = InjectorUtils.getClient(); - // Create the PubSub topic as necessary. - topic = InjectorUtils.getFullyQualifiedTopicName(project, topicName); - InjectorUtils.createTopic(pubsub, topic); - System.out.println("Injecting to topic: " + topic); - } else { - if (fileName.equalsIgnoreCase("none")) { - System.out.println("Filename not specified."); - System.exit(1); - } - System.out.println("Writing to file: " + fileName); - } - System.out.println("Starting Injector"); - - // Start off with some random live teams. - while (liveTeams.size() < NUM_LIVE_TEAMS) { - addLiveTeam(); - } - - // Publish messages at a rate determined by the QPS and Thread sleep settings. - for (int i = 0; true; i++) { - if (Thread.activeCount() > 10) { - System.err.println("I'm falling behind!"); - } - - // Decide if this should be a batch of late data. - final int numMessages; - final int delayInMillis; - if (i % LATE_DATA_RATE == 0) { - // Insert delayed data for one user (one message only) - delayInMillis = BASE_DELAY_IN_MILLIS + random.nextInt(FUZZY_DELAY_IN_MILLIS); - numMessages = 1; - System.out.println("DELAY(" + delayInMillis + ", " + numMessages + ")"); - } else { - System.out.print("."); - delayInMillis = 0; - numMessages = MIN_QPS + random.nextInt(QPS_RANGE); - } - - if (writeToFile) { // Won't use threading for the file write. - publishDataToFile(fileName, numMessages, delayInMillis); - } else { // Write to PubSub. - // Start a thread to inject some data. - new Thread(){ - @Override - public void run() { - try { - publishData(numMessages, delayInMillis); - } catch (IOException e) { - System.err.println(e); - } - } - }.start(); - } - - // Wait before creating another injector thread. - Thread.sleep(THREAD_SLEEP_MS); - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java deleted file mode 100644 index 55e8c7a8c3..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/InjectorUtils.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.injector; - -import static com.google.common.base.Preconditions.checkNotNull; - -import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; -import com.google.api.client.googleapis.json.GoogleJsonResponseException; -import com.google.api.client.googleapis.util.Utils; -import com.google.api.client.http.HttpRequestInitializer; -import com.google.api.client.http.HttpStatusCodes; -import com.google.api.client.http.HttpTransport; -import com.google.api.client.json.JsonFactory; -import com.google.api.services.pubsub.Pubsub; -import com.google.api.services.pubsub.PubsubScopes; -import com.google.api.services.pubsub.model.Topic; -import java.io.IOException; - -class InjectorUtils { - - private static final String APP_NAME = "injector"; - - /** - * Builds a new Pubsub client and returns it. - */ - public static Pubsub getClient(final HttpTransport httpTransport, - final JsonFactory jsonFactory) - throws IOException { - checkNotNull(httpTransport); - checkNotNull(jsonFactory); - GoogleCredential credential = - GoogleCredential.getApplicationDefault(httpTransport, jsonFactory); - if (credential.createScopedRequired()) { - credential = credential.createScoped(PubsubScopes.all()); - } - if (credential.getClientAuthentication() != null) { - System.out.println("\n***Warning! You are not using service account credentials to " - + "authenticate.\nYou need to use service account credentials for this example," - + "\nsince user-level credentials do not have enough pubsub quota,\nand so you will run " - + "out of PubSub quota very quickly.\nSee " - + "https://developers.google.com/identity/protocols/application-default-credentials."); - System.exit(1); - } - HttpRequestInitializer initializer = - new RetryHttpInitializerWrapper(credential); - return new Pubsub.Builder(httpTransport, jsonFactory, initializer) - .setApplicationName(APP_NAME) - .build(); - } - - /** - * Builds a new Pubsub client with default HttpTransport and - * JsonFactory and returns it. - */ - public static Pubsub getClient() throws IOException { - return getClient(Utils.getDefaultTransport(), - Utils.getDefaultJsonFactory()); - } - - - /** - * Returns the fully qualified topic name for Pub/Sub. - */ - public static String getFullyQualifiedTopicName( - final String project, final String topic) { - return String.format("projects/%s/topics/%s", project, topic); - } - - /** - * Create a topic if it doesn't exist. - */ - public static void createTopic(Pubsub client, String fullTopicName) - throws IOException { - try { - client.projects().topics().get(fullTopicName).execute(); - } catch (GoogleJsonResponseException e) { - if (e.getStatusCode() == HttpStatusCodes.STATUS_CODE_NOT_FOUND) { - Topic topic = client.projects().topics() - .create(fullTopicName, new Topic()) - .execute(); - System.out.printf("Topic %s was created.\n", topic.getName()); - } - } - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java deleted file mode 100644 index 5d0cc68763..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/injector/RetryHttpInitializerWrapper.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.injector; - -import static com.google.common.base.Preconditions.checkNotNull; - -import com.google.api.client.auth.oauth2.Credential; -import com.google.api.client.http.HttpBackOffIOExceptionHandler; -import com.google.api.client.http.HttpBackOffUnsuccessfulResponseHandler; -import com.google.api.client.http.HttpRequest; -import com.google.api.client.http.HttpRequestInitializer; -import com.google.api.client.http.HttpResponse; -import com.google.api.client.http.HttpUnsuccessfulResponseHandler; -import com.google.api.client.util.ExponentialBackOff; -import com.google.api.client.util.Sleeper; -import java.io.IOException; -import java.util.logging.Logger; - -/** - * RetryHttpInitializerWrapper will automatically retry upon RPC - * failures, preserving the auto-refresh behavior of the Google - * Credentials. - */ -public class RetryHttpInitializerWrapper implements HttpRequestInitializer { - - /** - * A private logger. - */ - private static final Logger LOG = - Logger.getLogger(RetryHttpInitializerWrapper.class.getName()); - - /** - * One minutes in miliseconds. - */ - private static final int ONEMINITUES = 60000; - - /** - * Intercepts the request for filling in the "Authorization" - * header field, as well as recovering from certain unsuccessful - * error codes wherein the Credential must refresh its token for a - * retry. - */ - private final Credential wrappedCredential; - - /** - * A sleeper; you can replace it with a mock in your test. - */ - private final Sleeper sleeper; - - /** - * A constructor. - * - * @param wrappedCredential Credential which will be wrapped and - * used for providing auth header. - */ - public RetryHttpInitializerWrapper(final Credential wrappedCredential) { - this(wrappedCredential, Sleeper.DEFAULT); - } - - /** - * A protected constructor only for testing. - * - * @param wrappedCredential Credential which will be wrapped and - * used for providing auth header. - * @param sleeper Sleeper for easy testing. - */ - RetryHttpInitializerWrapper( - final Credential wrappedCredential, final Sleeper sleeper) { - this.wrappedCredential = checkNotNull(wrappedCredential); - this.sleeper = sleeper; - } - - /** - * Initializes the given request. - */ - @Override - public final void initialize(final HttpRequest request) { - request.setReadTimeout(2 * ONEMINITUES); // 2 minutes read timeout - final HttpUnsuccessfulResponseHandler backoffHandler = - new HttpBackOffUnsuccessfulResponseHandler( - new ExponentialBackOff()) - .setSleeper(sleeper); - request.setInterceptor(wrappedCredential); - request.setUnsuccessfulResponseHandler( - new HttpUnsuccessfulResponseHandler() { - @Override - public boolean handleResponse( - final HttpRequest request, - final HttpResponse response, - final boolean supportsRetry) throws IOException { - if (wrappedCredential.handleResponse( - request, response, supportsRetry)) { - // If credential decides it can handle it, - // the return code or message indicated - // something specific to authentication, - // and no backoff is desired. - return true; - } else if (backoffHandler.handleResponse( - request, response, supportsRetry)) { - // Otherwise, we defer to the judgement of - // our internal backoff handler. - LOG.info("Retrying " - + request.getUrl().toString()); - return true; - } else { - return false; - } - } - }); - request.setIOExceptionHandler( - new HttpBackOffIOExceptionHandler(new ExponentialBackOff()) - .setSleeper(sleeper)); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java deleted file mode 100644 index 984e958c50..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToBigQuery.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.utils; - -import com.google.api.services.bigquery.model.TableFieldSchema; -import com.google.api.services.bigquery.model.TableReference; -import com.google.api.services.bigquery.model.TableRow; -import com.google.api.services.bigquery.model.TableSchema; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; - -/** - * Generate, format, and write BigQuery table row information. Use provided information about - * the field names and types, as well as lambda functions that describe how to generate their - * values. - */ -public class WriteToBigQuery - extends PTransform, PDone> { - - protected String projectId; - protected String datasetId; - protected String tableName; - protected Map> fieldInfo; - - public WriteToBigQuery() { - } - - public WriteToBigQuery( - String projectId, - String datasetId, - String tableName, - Map> fieldInfo) { - this.projectId = projectId; - this.datasetId = datasetId; - this.tableName = tableName; - this.fieldInfo = fieldInfo; - } - - /** - * A {@link Serializable} function from a {@link DoFn.ProcessContext} - * and {@link BoundedWindow} to the value for that field. - */ - public interface FieldFn extends Serializable { - Object apply(DoFn.ProcessContext context, BoundedWindow window); - } - - /** Define a class to hold information about output table field definitions. */ - public static class FieldInfo implements Serializable { - // The BigQuery 'type' of the field - private String fieldType; - // A lambda function to generate the field value - private FieldFn fieldFn; - - public FieldInfo(String fieldType, - FieldFn fieldFn) { - this.fieldType = fieldType; - this.fieldFn = fieldFn; - } - - String getFieldType() { - return this.fieldType; - } - - FieldFn getFieldFn() { - return this.fieldFn; - } - } - /** Convert each key/score pair into a BigQuery TableRow as specified by fieldFn. */ - protected class BuildRowFn extends DoFn { - - @ProcessElement - public void processElement(ProcessContext c, BoundedWindow window) { - - TableRow row = new TableRow(); - for (Map.Entry> entry : fieldInfo.entrySet()) { - String key = entry.getKey(); - FieldInfo fcnInfo = entry.getValue(); - FieldFn fcn = fcnInfo.getFieldFn(); - row.set(key, fcn.apply(c, window)); - } - c.output(row); - } - } - - /** Build the output table schema. */ - protected TableSchema getSchema() { - List fields = new ArrayList<>(); - for (Map.Entry> entry : fieldInfo.entrySet()) { - String key = entry.getKey(); - FieldInfo fcnInfo = entry.getValue(); - String bqType = fcnInfo.getFieldType(); - fields.add(new TableFieldSchema().setName(key).setType(bqType)); - } - return new TableSchema().setFields(fields); - } - - @Override - public PDone expand(PCollection teamAndScore) { - teamAndScore - .apply("ConvertToRow", ParDo.of(new BuildRowFn())) - .apply( - BigQueryIO.writeTableRows() - .to(getTable(projectId, datasetId, tableName)) - .withSchema(getSchema()) - .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) - .withWriteDisposition(WriteDisposition.WRITE_APPEND)); - return PDone.in(teamAndScore.getPipeline()); - } - - /** Utility to construct an output table reference. */ - static TableReference getTable(String projectId, String datasetId, String tableName) { - TableReference table = new TableReference(); - table.setDatasetId(datasetId); - table.setProjectId(projectId); - table.setTableId(tableName); - return table; - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java deleted file mode 100644 index 7d8d19f70d..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteToText.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.utils; - -import static com.google.common.base.Preconditions.checkArgument; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.TimeZone; -import java.util.stream.Collectors; -import org.apache.beam.sdk.io.FileBasedSink; -import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy; -import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions; -import org.apache.beam.sdk.io.fs.ResourceId; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; -import org.joda.time.DateTimeZone; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; - -/** - * Generate, format, and write rows. Use provided information about the field names and types, as - * well as lambda functions that describe how to generate their values. - */ -public class WriteToText - extends PTransform, PDone> { - - private static final DateTimeFormatter formatter = - DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS") - .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))); - - protected String filenamePrefix; - protected Map> fieldFn; - protected boolean windowed; - - public WriteToText() { - } - - public WriteToText( - String filenamePrefix, - Map> fieldFn, - boolean windowed) { - this.filenamePrefix = filenamePrefix; - this.fieldFn = fieldFn; - this.windowed = windowed; - } - - /** - * A {@link Serializable} function from a {@link DoFn.ProcessContext} - * and {@link BoundedWindow} to the value for that field. - */ - public interface FieldFn extends Serializable { - Object apply(DoFn.ProcessContext context, BoundedWindow window); - } - - /** Convert each key/score pair into a row as specified by fieldFn. */ - protected class BuildRowFn extends DoFn { - - @ProcessElement - public void processElement(ProcessContext c, BoundedWindow window) { - List fields = new ArrayList(); - for (Map.Entry> entry : fieldFn.entrySet()) { - String key = entry.getKey(); - FieldFn fcn = entry.getValue(); - fields.add(key + ": " + fcn.apply(c, window)); - } - String result = fields.stream().collect(Collectors.joining(", ")); - c.output(result); - } - } - - /** - * A {@link DoFn} that writes elements to files with names deterministically derived from the - * lower and upper bounds of their key (an {@link IntervalWindow}). - */ - protected class WriteOneFilePerWindow extends PTransform, PDone> { - - private final String filenamePrefix; - - public WriteOneFilePerWindow(String filenamePrefix) { - this.filenamePrefix = filenamePrefix; - } - - @Override - public PDone expand(PCollection input) { - // Verify that the input has a compatible window type. - checkArgument( - input.getWindowingStrategy().getWindowFn().windowCoder() == IntervalWindow.getCoder()); - - ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); - - return input.apply( - TextIO.write() - .to(new PerWindowFiles(resource)) - .withTempDirectory(resource.getCurrentDirectory()) - .withWindowedWrites() - .withNumShards(3)); - } - } - - /** - * A {@link FilenamePolicy} produces a base file name for a write based on metadata about the data - * being written. This always includes the shard number and the total number of shards. For - * windowed writes, it also includes the window and pane index (a sequence number assigned to each - * trigger firing). - */ - protected static class PerWindowFiles extends FilenamePolicy { - - private final ResourceId prefix; - - public PerWindowFiles(ResourceId prefix) { - this.prefix = prefix; - } - - public String filenamePrefixForWindow(IntervalWindow window) { - String filePrefix = prefix.isDirectory() ? "" : prefix.getFilename(); - return String.format( - "%s-%s-%s", filePrefix, formatter.print(window.start()), formatter.print(window.end())); - } - - @Override - public ResourceId windowedFilename(int shardNumber, - int numShards, - BoundedWindow window, - PaneInfo paneInfo, - OutputFileHints outputFileHints) { - IntervalWindow intervalWindow = (IntervalWindow) window; - String filename = - String.format( - "%s-%s-of-%s%s", - filenamePrefixForWindow(intervalWindow), - shardNumber, - numShards, - outputFileHints.getSuggestedFilenameSuffix()); - return prefix.getCurrentDirectory().resolve(filename, StandardResolveOptions.RESOLVE_FILE); - } - - @Override - public ResourceId unwindowedFilename( - int shardNumber, int numShards, OutputFileHints outputFileHints) { - throw new UnsupportedOperationException("Unsupported."); - } - } - - @Override - public PDone expand(PCollection teamAndScore) { - if (windowed) { - teamAndScore - .apply("ConvertToRow", ParDo.of(new BuildRowFn())) - .apply(new WriteToText.WriteOneFilePerWindow(filenamePrefix)); - } else { - teamAndScore - .apply("ConvertToRow", ParDo.of(new BuildRowFn())) - .apply(TextIO.write().to(filenamePrefix)); - } - return PDone.in(teamAndScore.getPipeline()); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java deleted file mode 100644 index 6aef88706d..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/main/java/complete/game/utils/WriteWindowedToBigQuery.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game.utils; - -import com.google.api.services.bigquery.model.TableRow; -import java.util.Map; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; - -/** - * Generate, format, and write BigQuery table row information. Subclasses {@link WriteToBigQuery} - * to require windowing; so this subclass may be used for writes that require access to the - * context's window information. - */ -public class WriteWindowedToBigQuery - extends WriteToBigQuery { - - public WriteWindowedToBigQuery( - String projectId, String datasetId, String tableName, Map> fieldInfo) { - super(projectId, datasetId, tableName, fieldInfo); - } - - /** Convert each key/score pair into a BigQuery TableRow. */ - protected class BuildRowFn extends DoFn { - @ProcessElement - public void processElement(ProcessContext c, BoundedWindow window) { - - TableRow row = new TableRow(); - for (Map.Entry> entry : fieldInfo.entrySet()) { - String key = entry.getKey(); - FieldInfo fcnInfo = entry.getValue(); - row.set(key, fcnInfo.getFieldFn().apply(c, window)); - } - c.output(row); - } - } - - @Override - public PDone expand(PCollection teamAndScore) { - teamAndScore - .apply("ConvertToRow", ParDo.of(new BuildRowFn())) - .apply(BigQueryIO.writeTableRows() - .to(getTable(projectId, datasetId, tableName)) - .withSchema(getSchema()) - .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) - .withWriteDisposition(WriteDisposition.WRITE_APPEND)); - return PDone.in(teamAndScore.getPipeline()); - } - -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java deleted file mode 100644 index 155242d996..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import com.google.common.io.Files; -import java.io.File; -import java.nio.charset.StandardCharsets; -import ${package}.DebuggingWordCount.WordCountOptions; -import org.apache.beam.sdk.testing.TestPipeline; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests for {@link DebuggingWordCount}. - */ -@RunWith(JUnit4.class) -public class DebuggingWordCountTest { - @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); - - @Test - public void testDebuggingWordCount() throws Exception { - File inputFile = tmpFolder.newFile(); - File outputFile = tmpFolder.newFile(); - Files.write( - "stomach secret Flourish message Flourish here Flourish", - inputFile, - StandardCharsets.UTF_8); - WordCountOptions options = - TestPipeline.testingPipelineOptions().as(WordCountOptions.class); - options.setInputFile(inputFile.getAbsolutePath()); - options.setOutput(outputFile.getAbsolutePath()); - DebuggingWordCount.main(TestPipeline.convertToArgs(options)); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/MinimalWordCountJava8Test.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/MinimalWordCountJava8Test.java deleted file mode 100644 index af347c1c0a..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/MinimalWordCountJava8Test.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import com.google.common.collect.ImmutableList; -import java.io.IOException; -import java.io.Serializable; -import java.nio.channels.FileChannel; -import java.nio.channels.SeekableByteChannel; -import java.nio.file.Files; -import java.nio.file.StandardOpenOption; -import java.util.Arrays; -import java.util.List; -import org.apache.beam.sdk.extensions.gcp.options.GcsOptions; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.FlatMapElements; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.util.GcsUtil; -import org.apache.beam.sdk.util.gcsfs.GcsPath; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; -import org.mockito.Mockito; -import org.mockito.invocation.InvocationOnMock; -import org.mockito.stubbing.Answer; - -/** - * To keep {@link MinimalWordCountJava8} simple, it is not factored or testable. This test - * file should be maintained with a copy of its code for a basic smoke test. - */ -@RunWith(JUnit4.class) -public class MinimalWordCountJava8Test implements Serializable { - - @Rule - public TestPipeline p = TestPipeline.create().enableAbandonedNodeEnforcement(false); - - /** - * A basic smoke test that ensures there is no crash at pipeline construction time. - */ - @Test - public void testMinimalWordCountJava8() throws Exception { - p.getOptions().as(GcsOptions.class).setGcsUtil(buildMockGcsUtil()); - - p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) - .apply(FlatMapElements - .into(TypeDescriptors.strings()) - .via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))) - .apply(Filter.by((String word) -> !word.isEmpty())) - .apply(Count.perElement()) - .apply(MapElements - .into(TypeDescriptors.strings()) - .via((KV wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())) - .apply(TextIO.write().to("gs://your-output-bucket/and-output-prefix")); - } - - private GcsUtil buildMockGcsUtil() throws IOException { - GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class); - - // Any request to open gets a new bogus channel - Mockito - .when(mockGcsUtil.open(Mockito.any(GcsPath.class))) - .then(new Answer() { - @Override - public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable { - return FileChannel.open( - Files.createTempFile("channel-", ".tmp"), - StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE); - } - }); - - // Any request for expansion returns a list containing the original GcsPath - // This is required to pass validation that occurs in TextIO during apply() - Mockito - .when(mockGcsUtil.expand(Mockito.any(GcsPath.class))) - .then(new Answer>() { - @Override - public List answer(InvocationOnMock invocation) throws Throwable { - return ImmutableList.of((GcsPath) invocation.getArguments()[0]); - } - }); - - return mockGcsUtil; - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/WordCountTest.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/WordCountTest.java deleted file mode 100644 index b4e4124e26..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/WordCountTest.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import java.util.List; -import ${package}.WordCount.CountWords; -import ${package}.WordCount.ExtractWordsFn; -import ${package}.WordCount.FormatAsTextFn; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.ValidatesRunner; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnTester; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.values.PCollection; -import org.hamcrest.CoreMatchers; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests of WordCount. - */ -@RunWith(JUnit4.class) -public class WordCountTest { - - /** Example test that tests a specific {@link DoFn}. */ - @Test - public void testExtractWordsFn() throws Exception { - DoFnTester extractWordsFn = - DoFnTester.of(new ExtractWordsFn()); - - Assert.assertThat(extractWordsFn.processBundle(" some input words "), - CoreMatchers.hasItems("some", "input", "words")); - Assert.assertThat(extractWordsFn.processBundle(" "), - CoreMatchers.hasItems()); - Assert.assertThat(extractWordsFn.processBundle(" some ", " input", " words"), - CoreMatchers.hasItems("some", "input", "words")); - } - - static final String[] WORDS_ARRAY = new String[] { - "hi there", "hi", "hi sue bob", - "hi sue", "", "bob hi"}; - - static final List WORDS = Arrays.asList(WORDS_ARRAY); - - static final String[] COUNTS_ARRAY = new String[] { - "hi: 5", "there: 1", "sue: 2", "bob: 2"}; - - @Rule - public TestPipeline p = TestPipeline.create(); - - /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */ - @Test - @Category(ValidatesRunner.class) - public void testCountWords() throws Exception { - PCollection input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of())); - - PCollection output = input.apply(new CountWords()) - .apply(MapElements.via(new FormatAsTextFn())); - - PAssert.that(output).containsInAnyOrder(COUNTS_ARRAY); - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java deleted file mode 100644 index 5cbdc6244f..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/GameStatsTest.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.io.Serializable; -import java.util.Arrays; -import java.util.List; -import ${package}.complete.game.GameStats.CalculateSpammyUsers; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.ValidatesRunner; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests of GameStats. - * Because the pipeline was designed for easy readability and explanations, it lacks good - * modularity for testing. See our testing documentation for better ideas: - * https://beam.apache.org/documentation/pipelines/test-your-pipeline/ - */ -@RunWith(JUnit4.class) -public class GameStatsTest implements Serializable { - - // User scores - static final List> USER_SCORES = Arrays.asList( - KV.of("Robot-2", 66), KV.of("Robot-1", 116), KV.of("user7_AndroidGreenKookaburra", 23), - KV.of("user7_AndroidGreenKookaburra", 1), - KV.of("user19_BisqueBilby", 14), KV.of("user13_ApricotQuokka", 15), - KV.of("user18_BananaEmu", 25), KV.of("user6_AmberEchidna", 8), - KV.of("user2_AmberQuokka", 6), KV.of("user0_MagentaKangaroo", 4), - KV.of("user0_MagentaKangaroo", 3), KV.of("user2_AmberCockatoo", 13), - KV.of("user7_AlmondWallaby", 15), KV.of("user6_AmberNumbat", 11), - KV.of("user6_AmberQuokka", 4)); - - // The expected list of 'spammers'. - static final List> SPAMMERS = Arrays.asList( - KV.of("Robot-2", 66), KV.of("Robot-1", 116)); - - @Rule - public TestPipeline p = TestPipeline.create(); - - /** Test the calculation of 'spammy users'. */ - @Test - @Category(ValidatesRunner.class) - public void testCalculateSpammyUsers() throws Exception { - PCollection> input = p.apply(Create.of(USER_SCORES)); - PCollection> output = input.apply(new CalculateSpammyUsers()); - - // Check the set of spammers. - PAssert.that(output).containsInAnyOrder(SPAMMERS); - - p.run().waitUntilFinish(); - } - - @Test - public void testGameStatsOptions() { - PipelineOptionsFactory.as(GameStats.Options.class); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java deleted file mode 100644 index 17d459df93..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/HourlyTeamScoreTest.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.io.Serializable; -import java.util.Arrays; -import java.util.List; -import ${package}.complete.game.UserScore.GameActionInfo; -import ${package}.complete.game.UserScore.ParseEventFn; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.ValidatesRunner; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.Filter; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.joda.time.Instant; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests of HourlyTeamScore. - * Because the pipeline was designed for easy readability and explanations, it lacks good - * modularity for testing. See our testing documentation for better ideas: - * https://beam.apache.org/documentation/pipelines/test-your-pipeline/ - */ -@RunWith(JUnit4.class) -public class HourlyTeamScoreTest implements Serializable { - - static final String[] GAME_EVENTS_ARRAY = new String[] { - "user0_MagentaKangaroo,MagentaKangaroo,3,1447955630000,2015-11-19 09:53:53.444", - "user13_ApricotQuokka,ApricotQuokka,15,1447955630000,2015-11-19 09:53:53.444", - "user6_AmberNumbat,AmberNumbat,11,1447955630000,2015-11-19 09:53:53.444", - "user7_AlmondWallaby,AlmondWallaby,15,1447955630000,2015-11-19 09:53:53.444", - "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,12,1447955630000,2015-11-19 09:53:53.444", - "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,11,1447955630000,2015-11-19 09:53:53.444", - "user19_BisqueBilby,BisqueBilby,6,1447955630000,2015-11-19 09:53:53.444", - "user19_BisqueBilby,BisqueBilby,8,1447955630000,2015-11-19 09:53:53.444", - // time gap... - "user0_AndroidGreenEchidna,AndroidGreenEchidna,0,1447965690000,2015-11-19 12:41:31.053", - "user0_MagentaKangaroo,MagentaKangaroo,4,1447965690000,2015-11-19 12:41:31.053", - "user2_AmberCockatoo,AmberCockatoo,13,1447965690000,2015-11-19 12:41:31.053", - "user18_BananaEmu,BananaEmu,7,1447965690000,2015-11-19 12:41:31.053", - "user3_BananaEmu,BananaEmu,17,1447965690000,2015-11-19 12:41:31.053", - "user18_BananaEmu,BananaEmu,1,1447965690000,2015-11-19 12:41:31.053", - "user18_ApricotCaneToad,ApricotCaneToad,14,1447965690000,2015-11-19 12:41:31.053" - }; - - - static final List GAME_EVENTS = Arrays.asList(GAME_EVENTS_ARRAY); - - - // Used to check the filtering. - static final KV[] FILTERED_EVENTS = new KV[] { - KV.of("user0_AndroidGreenEchidna", 0), KV.of("user0_MagentaKangaroo", 4), - KV.of("user2_AmberCockatoo", 13), - KV.of("user18_BananaEmu", 7), KV.of("user3_BananaEmu", 17), - KV.of("user18_BananaEmu", 1), KV.of("user18_ApricotCaneToad", 14) - }; - - @Rule - public TestPipeline p = TestPipeline.create(); - - /** Test the filtering. */ - @Test - @Category(ValidatesRunner.class) - public void testUserScoresFilter() throws Exception { - - final Instant startMinTimestamp = new Instant(1447965680000L); - - PCollection input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of())); - - PCollection> output = input - .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) - - .apply("FilterStartTime", Filter.by( - (GameActionInfo gInfo) - -> gInfo.getTimestamp() > startMinTimestamp.getMillis())) - // run a map to access the fields in the result. - .apply(MapElements - .into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) - .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))); - - PAssert.that(output).containsInAnyOrder(FILTERED_EVENTS); - - p.run().waitUntilFinish(); - } - - @Test - public void testUserScoreOptions() { - PipelineOptionsFactory.as(HourlyTeamScore.Options.class); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java deleted file mode 100644 index 6075c564b7..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/LeaderBoardTest.java +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package ${package}.complete.game; - -import static org.hamcrest.Matchers.hasItem; -import static org.junit.Assert.assertThat; - -import com.google.common.collect.ImmutableMap; -import java.io.Serializable; -import ${package}.complete.game.LeaderBoard.CalculateTeamScores; -import ${package}.complete.game.LeaderBoard.CalculateUserScores; -import ${package}.complete.game.UserScore.GameActionInfo; -import org.apache.beam.sdk.coders.AvroCoder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.TestStream; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.GlobalWindow; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TimestampedValue; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests for {@link LeaderBoard}. - */ -@RunWith(JUnit4.class) -public class LeaderBoardTest implements Serializable { - private static final Duration ALLOWED_LATENESS = Duration.standardHours(1); - private static final Duration TEAM_WINDOW_DURATION = Duration.standardMinutes(20); - private Instant baseTime = new Instant(0); - - @Rule - public TestPipeline p = TestPipeline.create(); - /** - * Some example users, on two separate teams. - */ - private enum TestUser { - RED_ONE("scarlet", "red"), RED_TWO("burgundy", "red"), - BLUE_ONE("navy", "blue"), BLUE_TWO("sky", "blue"); - - private final String userName; - private final String teamName; - - TestUser(String userName, String teamName) { - this.userName = userName; - this.teamName = teamName; - } - - public String getUser() { - return userName; - } - - public String getTeam() { - return teamName; - } - } - - /** - * A test of the {@link CalculateTeamScores} {@link PTransform} when all of the elements arrive - * on time (ahead of the watermark). - */ - @Test - public void testTeamScoresOnTime() { - - TestStream createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)) - // Start at the epoch - .advanceWatermarkTo(baseTime) - // add some elements ahead of the watermark - .addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), - event(TestUser.BLUE_ONE, 2, Duration.standardMinutes(1)), - event(TestUser.RED_TWO, 3, Duration.standardSeconds(22)), - event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(3))) - // The watermark advances slightly, but not past the end of the window - .advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(3))) - // Add some more on time elements - .addElements(event(TestUser.RED_ONE, 1, Duration.standardMinutes(4)), - event(TestUser.BLUE_ONE, 2, Duration.standardSeconds(270))) - // The window should close and emit an ON_TIME pane - .advanceWatermarkToInfinity(); - - PCollection> teamScores = p.apply(createEvents) - .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS)); - - String blueTeam = TestUser.BLUE_ONE.getTeam(); - String redTeam = TestUser.RED_ONE.getTeam(); - PAssert.that(teamScores) - .inOnTimePane(new IntervalWindow(baseTime, TEAM_WINDOW_DURATION)) - .containsInAnyOrder(KV.of(blueTeam, 12), KV.of(redTeam, 4)); - - p.run().waitUntilFinish(); - } - - /** - * A test of the {@link CalculateTeamScores} {@link PTransform} when all of the elements arrive - * on time, and the processing time advances far enough for speculative panes. - */ - @Test - public void testTeamScoresSpeculative() { - - TestStream createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)) - // Start at the epoch - .advanceWatermarkTo(baseTime) - .addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), - event(TestUser.BLUE_ONE, 2, Duration.standardMinutes(1))) - // Some time passes within the runner, which causes a speculative pane containing the blue - // team's score to be emitted - .advanceProcessingTime(Duration.standardMinutes(10)) - .addElements(event(TestUser.RED_TWO, 5, Duration.standardMinutes(3))) - // Some additional time passes and we get a speculative pane for the red team - .advanceProcessingTime(Duration.standardMinutes(12)) - .addElements(event(TestUser.BLUE_TWO, 3, Duration.standardSeconds(22))) - // More time passes and a speculative pane containing a refined value for the blue pane is - // emitted - .advanceProcessingTime(Duration.standardMinutes(10)) - // Some more events occur - .addElements(event(TestUser.RED_ONE, 4, Duration.standardMinutes(4)), - event(TestUser.BLUE_TWO, 2, Duration.standardMinutes(2))) - // The window closes and we get an ON_TIME pane that contains all of the updates - .advanceWatermarkToInfinity(); - - PCollection> teamScores = p.apply(createEvents) - .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS)); - - String blueTeam = TestUser.BLUE_ONE.getTeam(); - String redTeam = TestUser.RED_ONE.getTeam(); - IntervalWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION); - // The window contains speculative panes alongside the on-time pane - PAssert.that(teamScores) - .inWindow(window) - .containsInAnyOrder(KV.of(blueTeam, 10) /* The on-time blue pane */, - KV.of(redTeam, 9) /* The on-time red pane */, - KV.of(blueTeam, 5) /* The first blue speculative pane */, - KV.of(blueTeam, 8) /* The second blue speculative pane */, - KV.of(redTeam, 5) /* The red speculative pane */); - PAssert.that(teamScores) - .inOnTimePane(window) - .containsInAnyOrder(KV.of(blueTeam, 10), KV.of(redTeam, 9)); - - p.run().waitUntilFinish(); - } - - /** - * A test where elements arrive behind the watermark (late data), but before the end of the - * window. These elements are emitted on time. - */ - @Test - public void testTeamScoresUnobservablyLate() { - - BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION); - TestStream createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)) - .advanceWatermarkTo(baseTime) - .addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), - event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(8)), - event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), - event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(5))) - .advanceWatermarkTo(baseTime.plus(TEAM_WINDOW_DURATION).minus(Duration.standardMinutes(1))) - // These events are late, but the window hasn't closed yet, so the elements are in the - // on-time pane - .addElements(event(TestUser.RED_TWO, 2, Duration.ZERO), - event(TestUser.RED_TWO, 5, Duration.standardMinutes(1)), - event(TestUser.BLUE_TWO, 2, Duration.standardSeconds(90)), - event(TestUser.RED_TWO, 3, Duration.standardMinutes(3))) - .advanceWatermarkTo(baseTime.plus(TEAM_WINDOW_DURATION).plus(Duration.standardMinutes(1))) - .advanceWatermarkToInfinity(); - PCollection> teamScores = p.apply(createEvents) - .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS)); - - String blueTeam = TestUser.BLUE_ONE.getTeam(); - String redTeam = TestUser.RED_ONE.getTeam(); - // The On Time pane contains the late elements that arrived before the end of the window - PAssert.that(teamScores) - .inOnTimePane(window) - .containsInAnyOrder(KV.of(redTeam, 14), KV.of(blueTeam, 13)); - - p.run().waitUntilFinish(); - } - - /** - * A test where elements arrive behind the watermark (late data) after the watermark passes the - * end of the window, but before the maximum allowed lateness. These elements are emitted in a - * late pane. - */ - @Test - public void testTeamScoresObservablyLate() { - - Instant firstWindowCloses = baseTime.plus(ALLOWED_LATENESS).plus(TEAM_WINDOW_DURATION); - TestStream createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)) - .advanceWatermarkTo(baseTime) - .addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), - event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(8))) - .advanceProcessingTime(Duration.standardMinutes(10)) - .advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(3))) - .addElements(event(TestUser.RED_ONE, 3, Duration.standardMinutes(1)), - event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), - event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(5))) - .advanceWatermarkTo(firstWindowCloses.minus(Duration.standardMinutes(1))) - // These events are late but should still appear in a late pane - .addElements(event(TestUser.RED_TWO, 2, Duration.ZERO), - event(TestUser.RED_TWO, 5, Duration.standardMinutes(1)), - event(TestUser.RED_TWO, 3, Duration.standardMinutes(3))) - // A late refinement is emitted due to the advance in processing time, but the window has - // not yet closed because the watermark has not advanced - .advanceProcessingTime(Duration.standardMinutes(12)) - // These elements should appear in the final pane - .addElements(event(TestUser.RED_TWO, 9, Duration.standardMinutes(1)), - event(TestUser.RED_TWO, 1, Duration.standardMinutes(3))) - .advanceWatermarkToInfinity(); - - PCollection> teamScores = p.apply(createEvents) - .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS)); - - BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION); - String blueTeam = TestUser.BLUE_ONE.getTeam(); - String redTeam = TestUser.RED_ONE.getTeam(); - PAssert.that(teamScores) - .inWindow(window) - .satisfies((SerializableFunction>, Void>) input -> { - // The final sums need not exist in the same pane, but must appear in the output - // PCollection - assertThat(input, hasItem(KV.of(blueTeam, 11))); - assertThat(input, hasItem(KV.of(redTeam, 27))); - return null; - }); - PAssert.thatMap(teamScores) - // The closing behavior of CalculateTeamScores precludes an inFinalPane matcher - .inOnTimePane(window) - .isEqualTo(ImmutableMap.builder().put(redTeam, 7) - .put(blueTeam, 11) - .build()); - - // No final pane is emitted for the blue team, as all of their updates have been taken into - // account in earlier panes - PAssert.that(teamScores).inFinalPane(window).containsInAnyOrder(KV.of(redTeam, 27)); - - p.run().waitUntilFinish(); - } - - /** - * A test where elements arrive beyond the maximum allowed lateness. These elements are dropped - * within {@link CalculateTeamScores} and do not impact the final result. - */ - @Test - public void testTeamScoresDroppablyLate() { - - BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION); - TestStream infos = TestStream.create(AvroCoder.of(GameActionInfo.class)) - .addElements(event(TestUser.BLUE_ONE, 12, Duration.ZERO), - event(TestUser.RED_ONE, 3, Duration.ZERO)) - .advanceWatermarkTo(window.maxTimestamp()) - .addElements(event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), - event(TestUser.BLUE_TWO, 3, Duration.ZERO), - event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(3))) - // Move the watermark to the end of the window to output on time - .advanceWatermarkTo(baseTime.plus(TEAM_WINDOW_DURATION)) - // Move the watermark past the end of the allowed lateness plus the end of the window - .advanceWatermarkTo(baseTime.plus(ALLOWED_LATENESS) - .plus(TEAM_WINDOW_DURATION).plus(Duration.standardMinutes(1))) - // These elements within the expired window are droppably late, and will not appear in the - // output - .addElements( - event(TestUser.BLUE_TWO, 3, TEAM_WINDOW_DURATION.minus(Duration.standardSeconds(5))), - event(TestUser.RED_ONE, 7, Duration.standardMinutes(4))) - .advanceWatermarkToInfinity(); - PCollection> teamScores = p.apply(infos) - .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS)); - - String blueTeam = TestUser.BLUE_ONE.getTeam(); - String redTeam = TestUser.RED_ONE.getTeam(); - // Only one on-time pane and no late panes should be emitted - PAssert.that(teamScores) - .inWindow(window) - .containsInAnyOrder(KV.of(redTeam, 7), KV.of(blueTeam, 18)); - // No elements are added before the watermark passes the end of the window plus the allowed - // lateness, so no refinement should be emitted - PAssert.that(teamScores).inFinalPane(window).empty(); - - p.run().waitUntilFinish(); - } - - /** - * A test where elements arrive both on-time and late in {@link CalculateUserScores}, which emits - * output into the {@link GlobalWindow}. All elements that arrive should be taken into account, - * even if they arrive later than the maximum allowed lateness. - */ - @Test - public void testUserScore() { - - TestStream infos = - TestStream.create(AvroCoder.of(GameActionInfo.class)) - .addElements( - event(TestUser.BLUE_ONE, 12, Duration.ZERO), - event(TestUser.RED_ONE, 3, Duration.ZERO)) - .advanceProcessingTime(Duration.standardMinutes(7)) - .addElements( - event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), - event(TestUser.BLUE_TWO, 3, Duration.ZERO), - event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(3))) - .advanceProcessingTime(Duration.standardMinutes(5)) - .advanceWatermarkTo(baseTime.plus(ALLOWED_LATENESS).plus(Duration.standardHours(12))) - // Late elements are always observable within the global window - they arrive before - // the window closes, so they will appear in a pane, even if they arrive after the - // allowed lateness, and are taken into account alongside on-time elements - .addElements( - event(TestUser.RED_ONE, 3, Duration.standardMinutes(7)), - event(TestUser.RED_ONE, 2, (ALLOWED_LATENESS).plus(Duration.standardHours(13)))) - .advanceProcessingTime(Duration.standardMinutes(6)) - .addElements(event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(12))) - .advanceProcessingTime(Duration.standardMinutes(20)) - .advanceWatermarkToInfinity(); - - PCollection> userScores = - p.apply(infos).apply(new CalculateUserScores(ALLOWED_LATENESS)); - - // User scores are emitted in speculative panes in the Global Window - this matcher choice - // ensures that panes emitted by the watermark advancing to positive infinity are not included, - // as that will not occur outside of tests - PAssert.that(userScores) - .inEarlyGlobalWindowPanes() - .containsInAnyOrder(KV.of(TestUser.BLUE_ONE.getUser(), 15), - KV.of(TestUser.RED_ONE.getUser(), 7), - KV.of(TestUser.RED_ONE.getUser(), 12), - KV.of(TestUser.BLUE_TWO.getUser(), 3), - KV.of(TestUser.BLUE_TWO.getUser(), 8)); - - p.run().waitUntilFinish(); - } - - @Test - public void testLeaderBoardOptions() { - PipelineOptionsFactory.as(LeaderBoard.Options.class); - } - - private TimestampedValue event( - TestUser user, - int score, - Duration baseTimeOffset) { - return TimestampedValue.of(new GameActionInfo(user.getUser(), - user.getTeam(), - score, - baseTime.plus(baseTimeOffset).getMillis()), baseTime.plus(baseTimeOffset)); - } -} diff --git a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java b/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java deleted file mode 100644 index 83b8821480..0000000000 --- a/maven-archetypes/examples-java8/src/main/resources/archetype-resources/src/test/java/complete/game/UserScoreTest.java +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.complete.game; - -import java.io.Serializable; -import java.util.Arrays; -import java.util.List; -import ${package}.complete.game.UserScore.ExtractAndSumScore; -import ${package}.complete.game.UserScore.GameActionInfo; -import ${package}.complete.game.UserScore.ParseEventFn; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.ValidatesRunner; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFnTester; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests of UserScore. - */ -@RunWith(JUnit4.class) -public class UserScoreTest implements Serializable { - - static final String[] GAME_EVENTS_ARRAY = new String[] { - "user0_MagentaKangaroo,MagentaKangaroo,3,1447955630000,2015-11-19 09:53:53.444", - "user13_ApricotQuokka,ApricotQuokka,15,1447955630000,2015-11-19 09:53:53.444", - "user6_AmberNumbat,AmberNumbat,11,1447955630000,2015-11-19 09:53:53.444", - "user7_AlmondWallaby,AlmondWallaby,15,1447955630000,2015-11-19 09:53:53.444", - "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,12,1447955630000,2015-11-19 09:53:53.444", - "user6_AliceBlueDingo,AliceBlueDingo,4,xxxxxxx,2015-11-19 09:53:53.444", - "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,11,1447955630000,2015-11-19 09:53:53.444", - "THIS IS A PARSE ERROR,2015-11-19 09:53:53.444", - "user19_BisqueBilby,BisqueBilby,6,1447955630000,2015-11-19 09:53:53.444", - "user19_BisqueBilby,BisqueBilby,8,1447955630000,2015-11-19 09:53:53.444" - }; - - static final String[] GAME_EVENTS_ARRAY2 = new String[] { - "user6_AliceBlueDingo,AliceBlueDingo,4,xxxxxxx,2015-11-19 09:53:53.444", - "THIS IS A PARSE ERROR,2015-11-19 09:53:53.444", - "user13_BisqueBilby,BisqueBilby,xxx,1447955630000,2015-11-19 09:53:53.444" - }; - - static final List GAME_EVENTS = Arrays.asList(GAME_EVENTS_ARRAY); - static final List GAME_EVENTS2 = Arrays.asList(GAME_EVENTS_ARRAY2); - - static final List> USER_SUMS = Arrays.asList( - KV.of("user0_MagentaKangaroo", 3), KV.of("user13_ApricotQuokka", 15), - KV.of("user6_AmberNumbat", 11), KV.of("user7_AlmondWallaby", 15), - KV.of("user7_AndroidGreenKookaburra", 23), - KV.of("user19_BisqueBilby", 14)); - - static final List> TEAM_SUMS = Arrays.asList( - KV.of("MagentaKangaroo", 3), KV.of("ApricotQuokka", 15), - KV.of("AmberNumbat", 11), KV.of("AlmondWallaby", 15), - KV.of("AndroidGreenKookaburra", 23), - KV.of("BisqueBilby", 14)); - - @Rule - public TestPipeline p = TestPipeline.create(); - - /** Test the {@link ParseEventFn} {@link org.apache.beam.sdk.transforms.DoFn}. */ - @Test - public void testParseEventFn() throws Exception { - DoFnTester parseEventFn = - DoFnTester.of(new ParseEventFn()); - - List results = parseEventFn.processBundle(GAME_EVENTS_ARRAY); - Assert.assertEquals(results.size(), 8); - Assert.assertEquals(results.get(0).getUser(), "user0_MagentaKangaroo"); - Assert.assertEquals(results.get(0).getTeam(), "MagentaKangaroo"); - Assert.assertEquals(results.get(0).getScore(), new Integer(3)); - } - - /** Tests ExtractAndSumScore("user"). */ - @Test - @Category(ValidatesRunner.class) - public void testUserScoreSums() throws Exception { - - PCollection input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of())); - - PCollection> output = input - .apply(ParDo.of(new ParseEventFn())) - // Extract and sum username/score pairs from the event data. - .apply("ExtractUserScore", new ExtractAndSumScore("user")); - - // Check the user score sums. - PAssert.that(output).containsInAnyOrder(USER_SUMS); - - p.run().waitUntilFinish(); - } - - /** Tests ExtractAndSumScore("team"). */ - @Test - @Category(ValidatesRunner.class) - public void testTeamScoreSums() throws Exception { - - PCollection input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of())); - - PCollection> output = input - .apply(ParDo.of(new ParseEventFn())) - // Extract and sum teamname/score pairs from the event data. - .apply("ExtractTeamScore", new ExtractAndSumScore("team")); - - // Check the team score sums. - PAssert.that(output).containsInAnyOrder(TEAM_SUMS); - - p.run().waitUntilFinish(); - } - - /** Test that bad input data is dropped appropriately. */ - @Test - @Category(ValidatesRunner.class) - public void testUserScoresBadInput() throws Exception { - - PCollection input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of())); - - PCollection> extract = input - .apply(ParDo.of(new ParseEventFn())) - .apply( - MapElements - .into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) - .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))); - - PAssert.that(extract).empty(); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples-java8/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/examples-java8/src/test/resources/projects/basic/archetype.properties deleted file mode 100644 index b0195b3f16..0000000000 --- a/maven-archetypes/examples-java8/src/test/resources/projects/basic/archetype.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -package=it.pkg -version=0.1 -groupId=archetype.it -artifactId=basic -targetPlatform=1.8 diff --git a/maven-archetypes/examples-java8/src/test/resources/projects/basic/goal.txt b/maven-archetypes/examples-java8/src/test/resources/projects/basic/goal.txt deleted file mode 100644 index 0b5987362f..0000000000 --- a/maven-archetypes/examples-java8/src/test/resources/projects/basic/goal.txt +++ /dev/null @@ -1 +0,0 @@ -verify diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml deleted file mode 100644 index cfe47d4d8a..0000000000 --- a/maven-archetypes/examples/pom.xml +++ /dev/null @@ -1,80 +0,0 @@ - - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-archetypes-parent - 2.2.0-SNAPSHOT - ../pom.xml - - - google-cloud-dataflow-java-archetypes-examples - Google Cloud Dataflow SDK for Java - Examples Archetype - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This archetype creates a project containing all the example - pipelines. - - maven-archetype - - - - - org.apache.maven.archetype - archetype-packaging - 2.4 - - - - - - - maven-archetype-plugin - 2.4 - - - org.apache.maven.shared - maven-invoker - 2.2 - - - - - - default-integration-test - install - - integration-test - - - - - - - - - - diff --git a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml deleted file mode 100644 index 2b9eb52d80..0000000000 --- a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml +++ /dev/null @@ -1,44 +0,0 @@ - - - - - - - 1.7 - - - - - - src/main/java - - **/*.java - - - - - src/test/java - - **/*.java - - - - diff --git a/maven-archetypes/examples/src/main/resources/NOTICE b/maven-archetypes/examples/src/main/resources/NOTICE deleted file mode 100644 index 981fde5a9e..0000000000 --- a/maven-archetypes/examples/src/main/resources/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Google Cloud Dataflow SDK for Java -Copyright 2017, Google Inc. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml deleted file mode 100644 index 28ae0db9fe..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml +++ /dev/null @@ -1,241 +0,0 @@ - - - - 4.0.0 - - ${groupId} - ${artifactId} - ${version} - - jar - - - UTF-8 - 2.20 - - - - - ossrh.snapshots - Sonatype OSS Repository Hosting - https://oss.sonatype.org/content/repositories/snapshots/ - - false - - - true - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.6.1 - - ${targetPlatform} - ${targetPlatform} - - - - - org.apache.maven.plugins - maven-surefire-plugin - ${surefire-plugin.version} - - all - 4 - true - - - - org.apache.maven.surefire - surefire-junit47 - ${surefire-plugin.version} - - - - - - - org.apache.maven.plugins - maven-jar-plugin - 3.0.2 - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.0.0 - - - package - - shade - - - ${project.artifactId}-bundled-${project.version} - - - *:* - - META-INF/LICENSE - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - - - - - - - org.codehaus.mojo - exec-maven-plugin - 1.5.0 - - false - - - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - @project.version@ - - - - - com.google.api-client - google-api-client - 1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - com.google.apis - google-api-services-bigquery - v2-rev295-1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - com.google.http-client - google-http-client - 1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - com.google.apis - google-api-services-pubsub - v1-rev10-1.22.0 - - - - com.google.guava - guava-jdk5 - - - - - - joda-time - joda-time - 2.4 - - - - com.google.guava - guava - 20.0 - - - - - org.slf4j - slf4j-api - 1.7.14 - - - - org.slf4j - slf4j-jdk14 - 1.7.14 - - runtime - - - - - org.hamcrest - hamcrest-all - 1.3 - - - - junit - junit - 4.12 - - - diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java deleted file mode 100644 index 07870f2ed0..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import java.util.List; -import java.util.regex.Pattern; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * An example that verifies word counts in Shakespeare and includes Beam best practices. - * - *

This class, {@link DebuggingWordCount}, is the third in a series of four successively more - * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount} - * and {@link WordCount}. After you've looked at this example, then see the - * {@link WindowedWordCount} pipeline, for introduction of additional concepts. - * - *

Basic concepts, also in the MinimalWordCount and WordCount examples: - * Reading text files; counting a PCollection; executing a Pipeline both locally - * and using a selected runner; defining DoFns. - * - *

New Concepts: - *

- *   1. Logging using SLF4J, even in a distributed environment
- *   2. Creating a custom metric (runners have varying levels of support)
- *   3. Testing your Pipeline via PAssert
- * 
- * - *

To execute this pipeline locally, specify general pipeline configuration: - *

{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * 
- * - *

To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - * - */ -public class DebuggingWordCount { - /** A DoFn that filters for a specific key based upon a regular expression. */ - public static class FilterTextFn extends DoFn, KV> { - /** - * Concept #1: The logger below uses the fully qualified class name of FilterTextFn as the - * logger. Depending on your SLF4J configuration, log statements will likely be qualified by - * this name. - * - *

Note that this is entirely standard SLF4J usage. Some runners may provide a default SLF4J - * configuration that is most appropriate for their logging integration. - */ - private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class); - - private final Pattern filter; - public FilterTextFn(String pattern) { - filter = Pattern.compile(pattern); - } - - /** - * Concept #2: A custom metric can track values in your pipeline as it runs. Each - * runner provides varying levels of support for metrics, and may expose them - * in a dashboard, etc. - */ - private final Counter matchedWords = Metrics.counter(FilterTextFn.class, "matchedWords"); - private final Counter unmatchedWords = Metrics.counter(FilterTextFn.class, "unmatchedWords"); - - @ProcessElement - public void processElement(ProcessContext c) { - if (filter.matcher(c.element().getKey()).matches()) { - // Log at the "DEBUG" level each element that we match. When executing this pipeline - // these log lines will appear only if the log level is set to "DEBUG" or lower. - LOG.debug("Matched: " + c.element().getKey()); - matchedWords.inc(); - c.output(c.element()); - } else { - // Log at the "TRACE" level each element that is not matched. Different log levels - // can be used to control the verbosity of logging providing an effective mechanism - // to filter less important information. - LOG.trace("Did not match: " + c.element().getKey()); - unmatchedWords.inc(); - } - } - } - - /** - * Options supported by {@link DebuggingWordCount}. - * - *

Inherits standard configuration options and all options defined in - * {@link WordCount.WordCountOptions}. - */ - public interface WordCountOptions extends WordCount.WordCountOptions { - - @Description("Regex filter pattern to use in DebuggingWordCount. " - + "Only words matching this pattern will be counted.") - @Default.String("Flourish|stomach") - String getFilterPattern(); - void setFilterPattern(String value); - } - - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); - Pipeline p = Pipeline.create(options); - - PCollection> filteredWords = - p.apply("ReadLines", TextIO.read().from(options.getInputFile())) - .apply(new WordCount.CountWords()) - .apply(ParDo.of(new FilterTextFn(options.getFilterPattern()))); - - /** - * Concept #3: PAssert is a set of convenient PTransforms in the style of - * Hamcrest's collection matchers that can be used when writing Pipeline level tests - * to validate the contents of PCollections. PAssert is best used in unit tests - * with small data sets but is demonstrated here as a teaching tool. - * - *

Below we verify that the set of filtered words matches our expected counts. Note - * that PAssert does not provide any output and that successful completion of the - * Pipeline implies that the expectations were met. Learn more at - * https://beam.apache.org/documentation/pipelines/test-your-pipeline/ on how to test - * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test. - */ - List> expectedResults = Arrays.asList( - KV.of("Flourish", 3L), - KV.of("stomach", 1L)); - PAssert.that(filteredWords).containsInAnyOrder(expectedResults); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java deleted file mode 100644 index d6b08066db..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import ${package}.common.ExampleUtils; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.apache.beam.sdk.values.KV; - - -/** - * An example that counts words in Shakespeare. - * - *

This class, {@link MinimalWordCount}, is the first in a series of four successively more - * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or - * argument processing, and focus on construction of the pipeline, which chains together the - * application of core transforms. - * - *

Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally the - * {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional - * concepts. - * - *

Concepts: - * - *

- *   1. Reading data from text files
- *   2. Specifying 'inline' transforms
- *   3. Counting items in a PCollection
- *   4. Writing data to text files
- * 
- * - *

No arguments are required to run this pipeline. It will be executed with the DirectRunner. You - * can see the results in the output files in your current working directory, with names like - * "wordcounts-00001-of-00005. When running on a distributed service, you would use an appropriate - * file service. - */ -public class MinimalWordCount { - - public static void main(String[] args) { - // Create a PipelineOptions object. This object lets us set various execution - // options for our pipeline, such as the runner you wish to use. This example - // will run with the DirectRunner by default, based on the class path configured - // in its dependencies. - PipelineOptions options = PipelineOptionsFactory.create(); - - // Create the Pipeline object with the options we defined above. - Pipeline p = Pipeline.create(options); - - // Apply the pipeline's transforms. - - // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set - // of input text files. TextIO.Read returns a PCollection where each element is one line from - // the input text (a set of Shakespeare's texts). - - // This example reads a public data set consisting of the complete works of Shakespeare. - p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) - - // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a - // DoFn (defined in-line) on each element that tokenizes the text line into individual words. - // The ParDo returns a PCollection, where each element is an individual word in - // Shakespeare's collected texts. - .apply("ExtractWords", ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - for (String word : c.element().split(ExampleUtils.TOKENIZER_PATTERN)) { - if (!word.isEmpty()) { - c.output(word); - } - } - } - })) - - // Concept #3: Apply the Count transform to our PCollection of individual words. The Count - // transform returns a new PCollection of key/value pairs, where each key represents a unique - // word in the text. The associated value is the occurrence count for that word. - .apply(Count.perElement()) - - // Apply a MapElements transform that formats our PCollection of word counts into a printable - // string, suitable for writing to an output file. - .apply("FormatResults", MapElements.via(new SimpleFunction, String>() { - @Override - public String apply(KV input) { - return input.getKey() + ": " + input.getValue(); - } - })) - - // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline. - // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of - // formatted strings) to a series of text files. - // - // By default, it will write to a set of files with names like wordcount-00001-of-00005 - .apply(TextIO.write().to("wordcounts")); - - // Run the pipeline. - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java deleted file mode 100644 index 6a1d07c485..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.io.IOException; -import java.util.concurrent.ThreadLocalRandom; -import ${package}.common.ExampleBigQueryTableOptions; -import ${package}.common.ExampleOptions; -import ${package}.common.WriteOneFilePerWindow; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.joda.time.Duration; -import org.joda.time.Instant; - - -/** - * An example that counts words in text, and can run over either unbounded or bounded input - * collections. - * - *

This class, {@link WindowedWordCount}, is the last in a series of four successively more - * detailed 'word count' examples. First take a look at {@link MinimalWordCount}, - * {@link WordCount}, and {@link DebuggingWordCount}. - * - *

Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples: - * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally - * and using a selected runner; defining DoFns; - * user-defined PTransforms; defining PipelineOptions. - * - *

New Concepts: - *

- *   1. Unbounded and bounded pipeline input modes
- *   2. Adding timestamps to data
- *   3. Windowing
- *   4. Re-using PTransforms over windowed PCollections
- *   5. Accessing the window of an element
- *   6. Writing data to per-window text files
- * 
- * - *

By default, the examples will run with the {@code DirectRunner}. - * To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * See examples/java/README.md for instructions about how to configure different runners. - * - *

To execute this pipeline locally, specify a local output file (if using the - * {@code DirectRunner}) or output prefix on a supported distributed file system. - *

{@code
- *   --output=[YOUR_LOCAL_FILE | YOUR_OUTPUT_PREFIX]
- * }
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - * - *

By default, the pipeline will do fixed windowing, on 1-minute windows. You can - * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10} - * for 10-minute windows. - * - *

The example will try to cancel the pipeline on the signal to terminate the process (CTRL-C). - */ -public class WindowedWordCount { - static final int WINDOW_SIZE = 10; // Default window duration in minutes - /** - * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for - * this example, for the bounded data case. - * - *

Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate - * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a - * 2-hour period. - */ - static class AddTimestampFn extends DoFn { - private static final Duration RAND_RANGE = Duration.standardHours(1); - private final Instant minTimestamp; - private final Instant maxTimestamp; - - AddTimestampFn(Instant minTimestamp, Instant maxTimestamp) { - this.minTimestamp = minTimestamp; - this.maxTimestamp = maxTimestamp; - } - - @ProcessElement - public void processElement(ProcessContext c) { - Instant randomTimestamp = - new Instant( - ThreadLocalRandom.current() - .nextLong(minTimestamp.getMillis(), maxTimestamp.getMillis())); - - /** - * Concept #2: Set the data element with that timestamp. - */ - c.outputWithTimestamp(c.element(), new Instant(randomTimestamp)); - } - } - - /** A {@link DefaultValueFactory} that returns the current system time. */ - public static class DefaultToCurrentSystemTime implements DefaultValueFactory { - @Override - public Long create(PipelineOptions options) { - return System.currentTimeMillis(); - } - } - - /** A {@link DefaultValueFactory} that returns the minimum timestamp plus one hour. */ - public static class DefaultToMinTimestampPlusOneHour implements DefaultValueFactory { - @Override - public Long create(PipelineOptions options) { - return options.as(Options.class).getMinTimestampMillis() - + Duration.standardHours(1).getMillis(); - } - } - - /** - * Options for {@link WindowedWordCount}. - * - *

Inherits standard example configuration options, which allow specification of the - * runner, as well as the {@link WordCount.WordCountOptions} support for - * specification of the input and output files. - */ - public interface Options extends WordCount.WordCountOptions, - ExampleOptions, ExampleBigQueryTableOptions { - @Description("Fixed window duration, in minutes") - @Default.Integer(WINDOW_SIZE) - Integer getWindowSize(); - void setWindowSize(Integer value); - - @Description("Minimum randomly assigned timestamp, in milliseconds-since-epoch") - @Default.InstanceFactory(DefaultToCurrentSystemTime.class) - Long getMinTimestampMillis(); - void setMinTimestampMillis(Long value); - - @Description("Maximum randomly assigned timestamp, in milliseconds-since-epoch") - @Default.InstanceFactory(DefaultToMinTimestampPlusOneHour.class) - Long getMaxTimestampMillis(); - void setMaxTimestampMillis(Long value); - - @Description("Fixed number of shards to produce per window, or null for runner-chosen sharding") - Integer getNumShards(); - void setNumShards(Integer numShards); - } - - public static void main(String[] args) throws IOException { - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - final String output = options.getOutput(); - final Instant minTimestamp = new Instant(options.getMinTimestampMillis()); - final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis()); - - Pipeline pipeline = Pipeline.create(options); - - /** - * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or - * unbounded input source. - */ - PCollection input = pipeline - /** Read from the GCS file. */ - .apply(TextIO.read().from(options.getInputFile())) - // Concept #2: Add an element timestamp, using an artificial time just to show windowing. - // See AddTimestampFn for more detail on this. - .apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp))); - - /** - * Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1 - * minute (you can change this with a command-line option). See the documentation for more - * information on how fixed windows work, and for information on the other types of windowing - * available (e.g., sliding windows). - */ - PCollection windowedWords = - input.apply( - Window.into( - FixedWindows.of(Duration.standardMinutes(options.getWindowSize())))); - - /** - * Concept #4: Re-use our existing CountWords transform that does not have knowledge of - * windows over a PCollection containing windowed values. - */ - PCollection> wordCounts = windowedWords.apply(new WordCount.CountWords()); - - /** - * Concept #5: Format the results and write to a sharded file partitioned by window, using a - * simple ParDo operation. Because there may be failures followed by retries, the - * writes must be idempotent, but the details of writing to files is elided here. - */ - wordCounts - .apply(MapElements.via(new WordCount.FormatAsTextFn())) - .apply(new WriteOneFilePerWindow(output, options.getNumShards())); - - PipelineResult result = pipeline.run(); - try { - result.waitUntilFinish(); - } catch (Exception exc) { - result.cancel(); - } - } - -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java deleted file mode 100644 index 79b71403b9..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import ${package}.common.ExampleUtils; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.options.Validation.Required; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; - -/** - * An example that counts words in Shakespeare and includes Beam best practices. - * - *

This class, {@link WordCount}, is the second in a series of four successively more detailed - * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}. - * After you've looked at this example, then see the {@link DebuggingWordCount} - * pipeline, for introduction of additional concepts. - * - *

For a detailed walkthrough of this example, see - * - * https://beam.apache.org/get-started/wordcount-example/ - * - * - *

Basic concepts, also in the MinimalWordCount example: - * Reading text files; counting a PCollection; writing to text files - * - *

New Concepts: - *

- *   1. Executing a Pipeline both locally and using the selected runner
- *   2. Using ParDo with static DoFns defined out-of-line
- *   3. Building a composite transform
- *   4. Defining your own pipeline options
- * 
- * - *

Concept #1: you can execute this pipeline either locally or using by selecting another runner. - * These are now command-line options and not hard-coded as they were in the MinimalWordCount - * example. - * - *

To change the runner, specify: - *

{@code
- *   --runner=YOUR_SELECTED_RUNNER
- * }
- * 
- * - *

To execute this pipeline, specify a local output file (if using the - * {@code DirectRunner}) or output prefix on a supported distributed file system. - *

{@code
- *   --output=[YOUR_LOCAL_FILE | YOUR_OUTPUT_PREFIX]
- * }
- * - *

The input file defaults to a public data set containing the text of of King Lear, - * by William Shakespeare. You can override it and choose your own input with {@code --inputFile}. - */ -public class WordCount { - - /** - * Concept #2: You can make your pipeline assembly code less verbose by defining your DoFns - * statically out-of-line. This DoFn tokenizes lines of text into individual words; we pass it - * to a ParDo in the pipeline. - */ - static class ExtractWordsFn extends DoFn { - private final Counter emptyLines = Metrics.counter(ExtractWordsFn.class, "emptyLines"); - - @ProcessElement - public void processElement(ProcessContext c) { - if (c.element().trim().isEmpty()) { - emptyLines.inc(); - } - - // Split the line into words. - String[] words = c.element().split(ExampleUtils.TOKENIZER_PATTERN); - - // Output each word encountered into the output PCollection. - for (String word : words) { - if (!word.isEmpty()) { - c.output(word); - } - } - } - } - - /** A SimpleFunction that converts a Word and Count into a printable string. */ - public static class FormatAsTextFn extends SimpleFunction, String> { - @Override - public String apply(KV input) { - return input.getKey() + ": " + input.getValue(); - } - } - - /** - * A PTransform that converts a PCollection containing lines of text into a PCollection of - * formatted word counts. - * - *

Concept #3: This is a custom composite transform that bundles two transforms (ParDo and - * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse, - * modular testing, and an improved monitoring experience. - */ - public static class CountWords extends PTransform, - PCollection>> { - @Override - public PCollection> expand(PCollection lines) { - - // Convert lines of text into individual words. - PCollection words = lines.apply( - ParDo.of(new ExtractWordsFn())); - - // Count the number of times each word occurs. - PCollection> wordCounts = - words.apply(Count.perElement()); - - return wordCounts; - } - } - - /** - * Options supported by {@link WordCount}. - * - *

Concept #4: Defining your own configuration options. Here, you can add your own arguments - * to be processed by the command-line parser, and specify default values for them. You can then - * access the options values in your pipeline code. - * - *

Inherits standard configuration options. - */ - public interface WordCountOptions extends PipelineOptions { - - /** - * By default, this example reads from a public dataset containing the text of - * King Lear. Set this option to choose a different input file or glob. - */ - @Description("Path of the file to read from") - @Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt") - String getInputFile(); - void setInputFile(String value); - - /** - * Set this required option to specify where to write the output. - */ - @Description("Path of the file to write to") - @Required - String getOutput(); - void setOutput(String value); - } - - public static void main(String[] args) { - WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() - .as(WordCountOptions.class); - Pipeline p = Pipeline.create(options); - - // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the - // static FormatAsTextFn() to the ParDo transform. - p.apply("ReadLines", TextIO.read().from(options.getInputFile())) - .apply(new CountWords()) - .apply(MapElements.via(new FormatAsTextFn())) - .apply("WriteCounts", TextIO.write().to(options.getOutput())); - - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java deleted file mode 100644 index 57f1546e27..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import com.google.api.services.bigquery.model.TableSchema; -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure BigQuery tables in Beam examples. - * The project defaults to the project being used to run the example. - */ -public interface ExampleBigQueryTableOptions extends GcpOptions { - @Description("BigQuery dataset name") - @Default.String("beam_examples") - String getBigQueryDataset(); - void setBigQueryDataset(String dataset); - - @Description("BigQuery table name") - @Default.InstanceFactory(BigQueryTableFactory.class) - String getBigQueryTable(); - void setBigQueryTable(String table); - - @Description("BigQuery table schema") - TableSchema getBigQuerySchema(); - void setBigQuerySchema(TableSchema schema); - - /** - * Returns the job name as the default BigQuery table name. - */ - class BigQueryTableFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return options.getJobName().replace('-', '_'); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java deleted file mode 100644 index 90f935c3ce..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleOptions.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure the Beam examples. - */ -public interface ExampleOptions extends PipelineOptions { - @Description("Whether to keep jobs running after local process exit") - @Default.Boolean(false) - boolean getKeepJobsRunning(); - void setKeepJobsRunning(boolean keepJobsRunning); - - @Description("Number of workers to use when executing the injector pipeline") - @Default.Integer(1) - int getInjectorNumWorkers(); - void setInjectorNumWorkers(int numWorkers); -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java deleted file mode 100644 index cf142a10fd..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicAndSubscriptionOptions.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure Pub/Sub topic/subscription in Beam examples. - */ -public interface ExamplePubsubTopicAndSubscriptionOptions extends ExamplePubsubTopicOptions { - @Description("Pub/Sub subscription") - @Default.InstanceFactory(PubsubSubscriptionFactory.class) - String getPubsubSubscription(); - void setPubsubSubscription(String subscription); - - /** - * Returns a default Pub/Sub subscription based on the project and the job names. - */ - class PubsubSubscriptionFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return "projects/" + options.as(GcpOptions.class).getProject() - + "/subscriptions/" + options.getJobName(); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java deleted file mode 100644 index 86784b06da..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * Options that can be used to configure Pub/Sub topic in Beam examples. - */ -public interface ExamplePubsubTopicOptions extends GcpOptions { - @Description("Pub/Sub topic") - @Default.InstanceFactory(PubsubTopicFactory.class) - String getPubsubTopic(); - void setPubsubTopic(String topic); - - /** - * Returns a default Pub/Sub topic based on the project and the job names. - */ - class PubsubTopicFactory implements DefaultValueFactory { - @Override - public String create(PipelineOptions options) { - return "projects/" + options.as(GcpOptions.class).getProject() - + "/topics/" + options.getJobName(); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java deleted file mode 100644 index 78f3849b40..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleUtils.java +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import com.google.api.client.googleapis.json.GoogleJsonResponseException; -import com.google.api.client.googleapis.services.AbstractGoogleClientRequest; -import com.google.api.client.http.HttpRequestInitializer; -import com.google.api.services.bigquery.Bigquery; -import com.google.api.services.bigquery.Bigquery.Datasets; -import com.google.api.services.bigquery.Bigquery.Tables; -import com.google.api.services.bigquery.model.Dataset; -import com.google.api.services.bigquery.model.DatasetReference; -import com.google.api.services.bigquery.model.Table; -import com.google.api.services.bigquery.model.TableReference; -import com.google.api.services.bigquery.model.TableSchema; -import com.google.api.services.pubsub.Pubsub; -import com.google.api.services.pubsub.model.Subscription; -import com.google.api.services.pubsub.model.Topic; -import com.google.auth.Credentials; -import com.google.auth.http.HttpCredentialsAdapter; -import com.google.cloud.hadoop.util.ChainingHttpRequestInitializer; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.common.util.concurrent.Uninterruptibles; -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.extensions.gcp.auth.NullCredentialInitializer; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryOptions; -import org.apache.beam.sdk.io.gcp.pubsub.PubsubOptions; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.util.BackOff; -import org.apache.beam.sdk.util.BackOffUtils; -import org.apache.beam.sdk.util.FluentBackoff; -import org.apache.beam.sdk.util.RetryHttpRequestInitializer; -import org.apache.beam.sdk.util.Sleeper; -import org.apache.beam.sdk.util.Transport; -import org.joda.time.Duration; - -/** - * The utility class that sets up and tears down external resources, - * and cancels the streaming pipelines once the program terminates. - * - *

It is used to run Beam examples. - */ -public class ExampleUtils { - - private static final int SC_NOT_FOUND = 404; - - /** - * \p{L} denotes the category of Unicode letters, - * so this pattern will match on everything that is not a letter. - * - *

It is used for tokenizing strings in the wordcount examples. - */ - public static final String TOKENIZER_PATTERN = "[^\\p{L}]+"; - - private final PipelineOptions options; - private Bigquery bigQueryClient = null; - private Pubsub pubsubClient = null; - private Set pipelinesToCancel = Sets.newHashSet(); - private List pendingMessages = Lists.newArrayList(); - - /** - * Do resources and runner options setup. - */ - public ExampleUtils(PipelineOptions options) { - this.options = options; - } - - /** - * Sets up external resources that are required by the example, - * such as Pub/Sub topics and BigQuery tables. - * - * @throws IOException if there is a problem setting up the resources - */ - public void setup() throws IOException { - Sleeper sleeper = Sleeper.DEFAULT; - BackOff backOff = - FluentBackoff.DEFAULT - .withMaxRetries(3).withInitialBackoff(Duration.millis(200)).backoff(); - Throwable lastException = null; - try { - do { - try { - setupPubsub(); - setupBigQueryTable(); - return; - } catch (GoogleJsonResponseException e) { - lastException = e; - } - } while (BackOffUtils.next(sleeper, backOff)); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - // Ignore InterruptedException - } - throw new RuntimeException(lastException); - } - - /** - * Sets up the Google Cloud Pub/Sub topic. - * - *

If the topic doesn't exist, a new topic with the given name will be created. - * - * @throws IOException if there is a problem setting up the Pub/Sub topic - */ - public void setupPubsub() throws IOException { - ExamplePubsubTopicAndSubscriptionOptions pubsubOptions = - options.as(ExamplePubsubTopicAndSubscriptionOptions.class); - if (!pubsubOptions.getPubsubTopic().isEmpty()) { - pendingMessages.add("**********************Set Up Pubsub************************"); - setupPubsubTopic(pubsubOptions.getPubsubTopic()); - pendingMessages.add("The Pub/Sub topic has been set up for this example: " - + pubsubOptions.getPubsubTopic()); - - if (!pubsubOptions.getPubsubSubscription().isEmpty()) { - setupPubsubSubscription( - pubsubOptions.getPubsubTopic(), pubsubOptions.getPubsubSubscription()); - pendingMessages.add("The Pub/Sub subscription has been set up for this example: " - + pubsubOptions.getPubsubSubscription()); - } - } - } - - /** - * Sets up the BigQuery table with the given schema. - * - *

If the table already exists, the schema has to match the given one. Otherwise, the example - * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema - * will be created. - * - * @throws IOException if there is a problem setting up the BigQuery table - */ - public void setupBigQueryTable() throws IOException { - ExampleBigQueryTableOptions bigQueryTableOptions = - options.as(ExampleBigQueryTableOptions.class); - if (bigQueryTableOptions.getBigQueryDataset() != null - && bigQueryTableOptions.getBigQueryTable() != null - && bigQueryTableOptions.getBigQuerySchema() != null) { - pendingMessages.add("******************Set Up Big Query Table*******************"); - setupBigQueryTable(bigQueryTableOptions.getProject(), - bigQueryTableOptions.getBigQueryDataset(), - bigQueryTableOptions.getBigQueryTable(), - bigQueryTableOptions.getBigQuerySchema()); - pendingMessages.add("The BigQuery table has been set up for this example: " - + bigQueryTableOptions.getProject() - + ":" + bigQueryTableOptions.getBigQueryDataset() - + "." + bigQueryTableOptions.getBigQueryTable()); - } - } - - /** - * Tears down external resources that can be deleted upon the example's completion. - */ - private void tearDown() { - pendingMessages.add("*************************Tear Down*************************"); - ExamplePubsubTopicAndSubscriptionOptions pubsubOptions = - options.as(ExamplePubsubTopicAndSubscriptionOptions.class); - if (!pubsubOptions.getPubsubTopic().isEmpty()) { - try { - deletePubsubTopic(pubsubOptions.getPubsubTopic()); - pendingMessages.add("The Pub/Sub topic has been deleted: " - + pubsubOptions.getPubsubTopic()); - } catch (IOException e) { - pendingMessages.add("Failed to delete the Pub/Sub topic : " - + pubsubOptions.getPubsubTopic()); - } - if (!pubsubOptions.getPubsubSubscription().isEmpty()) { - try { - deletePubsubSubscription(pubsubOptions.getPubsubSubscription()); - pendingMessages.add("The Pub/Sub subscription has been deleted: " - + pubsubOptions.getPubsubSubscription()); - } catch (IOException e) { - pendingMessages.add("Failed to delete the Pub/Sub subscription : " - + pubsubOptions.getPubsubSubscription()); - } - } - } - - ExampleBigQueryTableOptions bigQueryTableOptions = - options.as(ExampleBigQueryTableOptions.class); - if (bigQueryTableOptions.getBigQueryDataset() != null - && bigQueryTableOptions.getBigQueryTable() != null - && bigQueryTableOptions.getBigQuerySchema() != null) { - pendingMessages.add("The BigQuery table might contain the example's output, " - + "and it is not deleted automatically: " - + bigQueryTableOptions.getProject() - + ":" + bigQueryTableOptions.getBigQueryDataset() - + "." + bigQueryTableOptions.getBigQueryTable()); - pendingMessages.add("Please go to the Developers Console to delete it manually." - + " Otherwise, you may be charged for its usage."); - } - } - - /** - * Returns a BigQuery client builder using the specified {@link BigQueryOptions}. - */ - private static Bigquery.Builder newBigQueryClient(BigQueryOptions options) { - return new Bigquery.Builder(Transport.getTransport(), Transport.getJsonFactory(), - chainHttpRequestInitializer( - options.getGcpCredential(), - // Do not log 404. It clutters the output and is possibly even required by the caller. - new RetryHttpRequestInitializer(ImmutableList.of(404)))) - .setApplicationName(options.getAppName()) - .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); - } - - /** - * Returns a Pubsub client builder using the specified {@link PubsubOptions}. - */ - private static Pubsub.Builder newPubsubClient(PubsubOptions options) { - return new Pubsub.Builder(Transport.getTransport(), Transport.getJsonFactory(), - chainHttpRequestInitializer( - options.getGcpCredential(), - // Do not log 404. It clutters the output and is possibly even required by the caller. - new RetryHttpRequestInitializer(ImmutableList.of(404)))) - .setRootUrl(options.getPubsubRootUrl()) - .setApplicationName(options.getAppName()) - .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); - } - - private static HttpRequestInitializer chainHttpRequestInitializer( - Credentials credential, HttpRequestInitializer httpRequestInitializer) { - if (credential == null) { - return new ChainingHttpRequestInitializer( - new NullCredentialInitializer(), httpRequestInitializer); - } else { - return new ChainingHttpRequestInitializer( - new HttpCredentialsAdapter(credential), - httpRequestInitializer); - } - } - - private void setupBigQueryTable(String projectId, String datasetId, String tableId, - TableSchema schema) throws IOException { - if (bigQueryClient == null) { - bigQueryClient = newBigQueryClient(options.as(BigQueryOptions.class)).build(); - } - - Datasets datasetService = bigQueryClient.datasets(); - if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) { - Dataset newDataset = new Dataset().setDatasetReference( - new DatasetReference().setProjectId(projectId).setDatasetId(datasetId)); - datasetService.insert(projectId, newDataset).execute(); - } - - Tables tableService = bigQueryClient.tables(); - Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId)); - if (table == null) { - Table newTable = new Table().setSchema(schema).setTableReference( - new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId)); - tableService.insert(projectId, datasetId, newTable).execute(); - } else if (!table.getSchema().equals(schema)) { - throw new RuntimeException( - "Table exists and schemas do not match, expecting: " + schema.toPrettyString() - + ", actual: " + table.getSchema().toPrettyString()); - } - } - - private void setupPubsubTopic(String topic) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) { - pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute(); - } - } - - private void setupPubsubSubscription(String topic, String subscription) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) == null) { - Subscription subInfo = new Subscription() - .setAckDeadlineSeconds(60) - .setTopic(topic); - pubsubClient.projects().subscriptions().create(subscription, subInfo).execute(); - } - } - - /** - * Deletes the Google Cloud Pub/Sub topic. - * - * @throws IOException if there is a problem deleting the Pub/Sub topic - */ - private void deletePubsubTopic(String topic) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) { - pubsubClient.projects().topics().delete(topic).execute(); - } - } - - /** - * Deletes the Google Cloud Pub/Sub subscription. - * - * @throws IOException if there is a problem deleting the Pub/Sub subscription - */ - private void deletePubsubSubscription(String subscription) throws IOException { - if (pubsubClient == null) { - pubsubClient = newPubsubClient(options.as(PubsubOptions.class)).build(); - } - if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) != null) { - pubsubClient.projects().subscriptions().delete(subscription).execute(); - } - } - - /** - * Waits for the pipeline to finish and cancels it before the program exists. - */ - public void waitToFinish(PipelineResult result) { - pipelinesToCancel.add(result); - if (!options.as(ExampleOptions.class).getKeepJobsRunning()) { - addShutdownHook(pipelinesToCancel); - } - try { - result.waitUntilFinish(); - } catch (UnsupportedOperationException e) { - // Do nothing if the given PipelineResult doesn't support waitUntilFinish(), - // such as EvaluationResults returned by DirectRunner. - tearDown(); - printPendingMessages(); - } catch (Exception e) { - throw new RuntimeException("Failed to wait the pipeline until finish: " + result); - } - } - - private void addShutdownHook(final Collection pipelineResults) { - Runtime.getRuntime().addShutdownHook(new Thread() { - @Override - public void run() { - tearDown(); - printPendingMessages(); - for (PipelineResult pipelineResult : pipelineResults) { - try { - pipelineResult.cancel(); - } catch (IOException e) { - System.out.println("Failed to cancel the job."); - System.out.println(e.getMessage()); - } - } - - for (PipelineResult pipelineResult : pipelineResults) { - boolean cancellationVerified = false; - for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) { - if (pipelineResult.getState().isTerminal()) { - cancellationVerified = true; - break; - } else { - System.out.println( - "The example pipeline is still running. Verifying the cancellation."); - } - Uninterruptibles.sleepUninterruptibly(10, TimeUnit.SECONDS); - } - if (!cancellationVerified) { - System.out.println("Failed to verify the cancellation for job: " + pipelineResult); - } - } - } - }); - } - - private void printPendingMessages() { - System.out.println(); - System.out.println("***********************************************************"); - System.out.println("***********************************************************"); - for (String message : pendingMessages) { - System.out.println(message); - } - System.out.println("***********************************************************"); - System.out.println("***********************************************************"); - } - - private static T executeNullIfNotFound( - AbstractGoogleClientRequest request) throws IOException { - try { - return request.execute(); - } catch (GoogleJsonResponseException e) { - if (e.getStatusCode() == SC_NOT_FOUND) { - return null; - } else { - throw e; - } - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java deleted file mode 100644 index 9796d647b5..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/WriteOneFilePerWindow.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}.common; - -import static com.google.common.base.MoreObjects.firstNonNull; - -import javax.annotation.Nullable; -import org.apache.beam.sdk.io.FileBasedSink; -import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy; -import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions; -import org.apache.beam.sdk.io.fs.ResourceId; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; -import org.joda.time.format.DateTimeFormatter; -import org.joda.time.format.ISODateTimeFormat; - -/** - * A {@link DoFn} that writes elements to files with names deterministically derived from the lower - * and upper bounds of their key (an {@link IntervalWindow}). - * - *

This is test utility code, not for end-users, so examples can be focused on their primary - * lessons. - */ -public class WriteOneFilePerWindow extends PTransform, PDone> { - private static final DateTimeFormatter FORMATTER = ISODateTimeFormat.hourMinute(); - private String filenamePrefix; - @Nullable - private Integer numShards; - - public WriteOneFilePerWindow(String filenamePrefix, Integer numShards) { - this.filenamePrefix = filenamePrefix; - this.numShards = numShards; - } - - @Override - public PDone expand(PCollection input) { - ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); - TextIO.Write write = - TextIO.write() - .to(new PerWindowFiles(resource)) - .withTempDirectory(resource.getCurrentDirectory()) - .withWindowedWrites(); - if (numShards != null) { - write = write.withNumShards(numShards); - } - return input.apply(write); - } - - /** - * A {@link FilenamePolicy} produces a base file name for a write based on metadata about the data - * being written. This always includes the shard number and the total number of shards. For - * windowed writes, it also includes the window and pane index (a sequence number assigned to each - * trigger firing). - */ - public static class PerWindowFiles extends FilenamePolicy { - - private final ResourceId baseFilename; - - public PerWindowFiles(ResourceId baseFilename) { - this.baseFilename = baseFilename; - } - - public String filenamePrefixForWindow(IntervalWindow window) { - String prefix = - baseFilename.isDirectory() ? "" : firstNonNull(baseFilename.getFilename(), ""); - return String.format("%s-%s-%s", - prefix, FORMATTER.print(window.start()), FORMATTER.print(window.end())); - } - - @Override - public ResourceId windowedFilename(int shardNumber, - int numShards, - BoundedWindow window, - PaneInfo paneInfo, - OutputFileHints outputFileHints) { - IntervalWindow intervalWindow = (IntervalWindow) window; - String filename = - String.format( - "%s-%s-of-%s%s", - filenamePrefixForWindow(intervalWindow), - shardNumber, - numShards, - outputFileHints.getSuggestedFilenameSuffix()); - return baseFilename - .getCurrentDirectory() - .resolve(filename, StandardResolveOptions.RESOLVE_FILE); - } - - @Override - public ResourceId unwindowedFilename( - int shardNumber, int numShards, OutputFileHints outputFileHints) { - throw new UnsupportedOperationException("Unsupported."); - } - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java deleted file mode 100644 index 155242d996..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import com.google.common.io.Files; -import java.io.File; -import java.nio.charset.StandardCharsets; -import ${package}.DebuggingWordCount.WordCountOptions; -import org.apache.beam.sdk.testing.TestPipeline; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests for {@link DebuggingWordCount}. - */ -@RunWith(JUnit4.class) -public class DebuggingWordCountTest { - @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); - - @Test - public void testDebuggingWordCount() throws Exception { - File inputFile = tmpFolder.newFile(); - File outputFile = tmpFolder.newFile(); - Files.write( - "stomach secret Flourish message Flourish here Flourish", - inputFile, - StandardCharsets.UTF_8); - WordCountOptions options = - TestPipeline.testingPipelineOptions().as(WordCountOptions.class); - options.setInputFile(inputFile.getAbsolutePath()); - options.setOutput(outputFile.getAbsolutePath()); - DebuggingWordCount.main(TestPipeline.convertToArgs(options)); - } -} diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java deleted file mode 100644 index b4e4124e26..0000000000 --- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import java.util.Arrays; -import java.util.List; -import ${package}.WordCount.CountWords; -import ${package}.WordCount.ExtractWordsFn; -import ${package}.WordCount.FormatAsTextFn; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.ValidatesRunner; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnTester; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.values.PCollection; -import org.hamcrest.CoreMatchers; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests of WordCount. - */ -@RunWith(JUnit4.class) -public class WordCountTest { - - /** Example test that tests a specific {@link DoFn}. */ - @Test - public void testExtractWordsFn() throws Exception { - DoFnTester extractWordsFn = - DoFnTester.of(new ExtractWordsFn()); - - Assert.assertThat(extractWordsFn.processBundle(" some input words "), - CoreMatchers.hasItems("some", "input", "words")); - Assert.assertThat(extractWordsFn.processBundle(" "), - CoreMatchers.hasItems()); - Assert.assertThat(extractWordsFn.processBundle(" some ", " input", " words"), - CoreMatchers.hasItems("some", "input", "words")); - } - - static final String[] WORDS_ARRAY = new String[] { - "hi there", "hi", "hi sue bob", - "hi sue", "", "bob hi"}; - - static final List WORDS = Arrays.asList(WORDS_ARRAY); - - static final String[] COUNTS_ARRAY = new String[] { - "hi: 5", "there: 1", "sue: 2", "bob: 2"}; - - @Rule - public TestPipeline p = TestPipeline.create(); - - /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */ - @Test - @Category(ValidatesRunner.class) - public void testCountWords() throws Exception { - PCollection input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of())); - - PCollection output = input.apply(new CountWords()) - .apply(MapElements.via(new FormatAsTextFn())); - - PAssert.that(output).containsInAnyOrder(COUNTS_ARRAY); - p.run().waitUntilFinish(); - } -} diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties deleted file mode 100644 index 8a76657024..0000000000 --- a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -package=it.pkg -version=0.1 -groupId=archetype.it -artifactId=basic -targetPlatform=1.7 diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt b/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt deleted file mode 100644 index 0b5987362f..0000000000 --- a/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt +++ /dev/null @@ -1 +0,0 @@ -verify diff --git a/maven-archetypes/pom.xml b/maven-archetypes/pom.xml deleted file mode 100644 index 7e5eb44cc9..0000000000 --- a/maven-archetypes/pom.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - 2.2.0-SNAPSHOT - ../pom.xml - - - google-cloud-dataflow-java-archetypes-parent - pom - - Google Cloud Dataflow SDK for Java - Maven Archetypes - - - starter - examples - examples-java8 - - - - - - - src/main/resources - true - - archetype-resources/pom.xml - - - - - src/main/resources - false - - archetype-resources/pom.xml - - - - - - - - org.apache.maven.plugins - maven-resources-plugin - - - @ - - false - - - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - default-jar - none - - - default-test-jar - none - - - - - - diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml deleted file mode 100644 index 34bfd076bb..0000000000 --- a/maven-archetypes/starter/pom.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-archetypes-parent - 2.2.0-SNAPSHOT - ../pom.xml - - - google-cloud-dataflow-java-archetypes-starter - Google Cloud Dataflow SDK for Java - Starter Archetype - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This archetype creates a simple starter pipeline to get started - using the Google Cloud Dataflow SDK for Java. - - maven-archetype - - - - - org.apache.maven.archetype - archetype-packaging - 2.4 - - - - - - - src/test/resources - true - - - - - - - maven-archetype-plugin - 2.4 - - - org.apache.maven.shared - maven-invoker - 2.2 - - - - - - default-integration-test - install - - integration-test - - - - true - - - - - - - - - diff --git a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml deleted file mode 100644 index 4c22d5d68b..0000000000 --- a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - 1.7 - - - - - - src/main/java - - **/*.java - - - - diff --git a/maven-archetypes/starter/src/main/resources/NOTICE b/maven-archetypes/starter/src/main/resources/NOTICE deleted file mode 100644 index 981fde5a9e..0000000000 --- a/maven-archetypes/starter/src/main/resources/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Google Cloud Dataflow SDK for Java -Copyright 2017, Google Inc. - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml deleted file mode 100644 index 75eaaade81..0000000000 --- a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml +++ /dev/null @@ -1,90 +0,0 @@ - - - - 4.0.0 - - ${groupId} - ${artifactId} - ${version} - - - UTF-8 - - - - - ossrh.snapshots - Sonatype OSS Repository Hosting - https://oss.sonatype.org/content/repositories/snapshots/ - - false - - - true - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.6.1 - - ${targetPlatform} - ${targetPlatform} - - - - - - - - org.codehaus.mojo - exec-maven-plugin - 1.5.0 - - false - - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - @project.version@ - - - - - org.slf4j - slf4j-api - 1.7.14 - - - org.slf4j - slf4j-jdk14 - 1.7.14 - - - diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java deleted file mode 100644 index d6afdecf11..0000000000 --- a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package ${package}; - -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A starter example for writing Beam programs. - * - *

The example takes two strings, converts them to their upper-case - * representation and logs them. - * - *

To run this starter example locally using DirectRunner, just - * execute it without any additional parameters from your favorite development - * environment. - * - *

To run this starter example using managed resource in Google Cloud - * Platform, you should specify the following command-line options: - * --project= - * --stagingLocation= - * --runner=DataflowRunner - */ -public class StarterPipeline { - private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class); - - public static void main(String[] args) { - Pipeline p = Pipeline.create( - PipelineOptionsFactory.fromArgs(args).withValidation().create()); - - p.apply(Create.of("Hello", "World")) - .apply(MapElements.via(new SimpleFunction() { - @Override - public String apply(String input) { - return input.toUpperCase(); - } - })) - .apply(ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - LOG.info(c.element()); - } - })); - - p.run(); - } -} diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties deleted file mode 100644 index 8a76657024..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -package=it.pkg -version=0.1 -groupId=archetype.it -artifactId=basic -targetPlatform=1.7 diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt b/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt deleted file mode 100644 index 0b5987362f..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt +++ /dev/null @@ -1 +0,0 @@ -verify diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml deleted file mode 100644 index fc0940bf2d..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml +++ /dev/null @@ -1,90 +0,0 @@ - - - - 4.0.0 - - archetype.it - basic - 0.1 - - - UTF-8 - - - - - ossrh.snapshots - Sonatype OSS Repository Hosting - https://oss.sonatype.org/content/repositories/snapshots/ - - false - - - true - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.6.1 - - 1.7 - 1.7 - - - - - - - - org.codehaus.mojo - exec-maven-plugin - 1.5.0 - - false - - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - @project.version@ - - - - - org.slf4j - slf4j-api - 1.7.14 - - - org.slf4j - slf4j-jdk14 - 1.7.14 - - - diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java deleted file mode 100644 index 4ae92e8ce6..0000000000 --- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package it.pkg; - -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SimpleFunction; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A starter example for writing Beam programs. - * - *

The example takes two strings, converts them to their upper-case - * representation and logs them. - * - *

To run this starter example locally using DirectRunner, just - * execute it without any additional parameters from your favorite development - * environment. - * - *

To run this starter example using managed resource in Google Cloud - * Platform, you should specify the following command-line options: - * --project= - * --stagingLocation= - * --runner=DataflowRunner - */ -public class StarterPipeline { - private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class); - - public static void main(String[] args) { - Pipeline p = Pipeline.create( - PipelineOptionsFactory.fromArgs(args).withValidation().create()); - - p.apply(Create.of("Hello", "World")) - .apply(MapElements.via(new SimpleFunction() { - @Override - public String apply(String input) { - return input.toUpperCase(); - } - })) - .apply(ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - LOG.info(c.element()); - } - })); - - p.run(); - } -} diff --git a/pom.xml b/pom.xml deleted file mode 100644 index f9a662c658..0000000000 --- a/pom.xml +++ /dev/null @@ -1,452 +0,0 @@ - - - - 4.0.0 - - - com.google - google - 5 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - Google Cloud Dataflow SDK for Java - Parent - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This artifact includes the parent POM for other Dataflow SDK - artifacts. - http://cloud.google.com/dataflow - 2013 - - 2.2.0-SNAPSHOT - - - - Apache License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - - - - - - Google Inc. - http://www.google.com - - - - - scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git - HEAD - - - - - ossrh - https://oss.sonatype.org/content/repositories/snapshots - - - ossrh - https://oss.sonatype.org/service/local/staging/deploy/maven2/ - - - - - - apache.staging - Apache Software Foundation Staging Repository - https://repository.apache.org/content/repositories/staging/ - - true - - - false - - - - - apache.snapshots - Apache Software Foundation Snapshot Repository - https://repository.apache.org/content/repositories/snapshots/ - - false - - - true - - - - - - 3.2 - - - - UTF-8 - ${maven.build.timestamp} - yyyy-MM-dd HH:mm - - 2.2.0-SNAPSHOT - - Google Cloud Dataflow SDK for Java - ${project.version}-20170517 - 6 - 1 - - 4.12 - - - pom - - sdk - examples - maven-archetypes - - - - - - - org.apache.maven.plugins - maven-enforcer-plugin - 1.4.1 - - - enforce-java - - enforce - - - - - - [1.8.0,) - - - - - - - - - org.apache.maven.plugins - maven-clean-plugin - 3.0.0 - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.6.2 - - 1.7 - 1.7 - - -Xlint:all - -Werror - -Xlint:-options - - -Xlint:-processing - - true - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - 2.17 - - - com.puppycrawl.tools - checkstyle - 6.19 - - - org.apache.beam - beam-sdks-java-build-tools - ${beam.version} - - - - beam/checkstyle.xml - sdk/suppressions.xml - true - true - false - true - - - - - test-compile - - check - - - - - - - org.apache.maven.plugins - maven-jar-plugin - 3.0.2 - - true - - - - default-jar - - jar - - - - default-test-jar - - test-jar - - - - - - - org.apache.maven.plugins - maven-source-plugin - 3.0.1 - - - attach-sources - compile - - jar - - - - attach-test-sources - test-compile - - test-jar - - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - 2.10.4 - - false - - - - javadoc - package - - jar - - - - - - - org.apache.maven.plugins - maven-resources-plugin - 3.0.2 - - - - org.apache.maven.plugins - maven-dependency-plugin - 3.0.0 - - - - analyze-only - - - true - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - 2.20 - - - - org.apache.maven.plugins - maven-archetype-plugin - 2.4 - - - org.apache.maven.shared - maven-invoker - 2.2 - - - - - - default-integration-test - install - - integration-test - - - true - - - - - - - org.apache.maven.plugins - maven-release-plugin - 2.5.3 - - true - true - deploy - - - - - org.codehaus.mojo - exec-maven-plugin - 1.5.0 - - false - - - - - - - - org.apache.maven.plugins - maven-enforcer-plugin - - - - org.apache.maven.plugins - maven-compiler-plugin - - - - org.apache.maven.plugins - maven-source-plugin - - - - org.apache.maven.plugins - maven-javadoc-plugin - - - - org.apache.maven.plugins - maven-dependency-plugin - - - - org.apache.maven.plugins - maven-surefire-plugin - - - - org.apache.maven.plugins - maven-checkstyle-plugin - - - - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - ${project.version} - - - - org.apache.beam - beam-sdks-java-core - ${beam.version} - - - - org.apache.beam - beam-sdks-java-io-google-cloud-platform - ${beam.version} - - - - org.apache.beam - beam-runners-direct-java - ${beam.version} - - - - org.apache.beam - beam-runners-google-cloud-dataflow-java - ${beam.version} - - - - org.apache.beam - beam-examples-java - ${beam.version} - - - - org.apache.beam - beam-examples-java8 - ${beam.version} - - - - junit - junit - ${junit.version} - test - - - - diff --git a/sdk/pom.xml b/sdk/pom.xml deleted file mode 100644 index 33f2255f82..0000000000 --- a/sdk/pom.xml +++ /dev/null @@ -1,70 +0,0 @@ - - - - 4.0.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-parent - 2.2.0-SNAPSHOT - - - google-cloud-dataflow-java-sdk-all - Google Cloud Dataflow SDK for Java - All - Google Cloud Dataflow SDK for Java is a distribution of Apache - Beam designed to simplify usage of Apache Beam on Google Cloud Dataflow - service. This artifact includes entire Dataflow Java SDK. - - jar - - - - - src/main/resources - true - - - - - - - org.apache.beam - beam-sdks-java-core - - - - org.apache.beam - beam-sdks-java-io-google-cloud-platform - - - - org.apache.beam - beam-runners-direct-java - - - - org.apache.beam - beam-runners-google-cloud-dataflow-java - - - - junit - junit - test - - - diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java deleted file mode 100644 index 7bbfbe3729..0000000000 --- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/SdkDependencies.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) 2017 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.dataflow.sdk; - -import org.apache.beam.runners.dataflow.DataflowRunner; -import org.apache.beam.runners.direct.DirectRunner; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; - -/** - * Mark the dependencies as used at compile time. - */ -class SdkDependencies { - private Pipeline p; - private BigQueryIO bigQueryIO; - private DirectRunner directRunner; - private DataflowRunner dataflowRunner; -} diff --git a/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow-distribution.properties b/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow-distribution.properties deleted file mode 100644 index 33ee76287a..0000000000 --- a/sdk/src/main/resources/org/apache/beam/runners/dataflow/dataflow-distribution.properties +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -name=${dataflow.release_name} -version=${pom.version} -build.date=${timestamp} -legacy.environment.major.version=${dataflow.legacy_environment_major_version} -fnapi.environment.major.version=${dataflow.fnapi_environment_major_version} -container.version=${dataflow.container_version} diff --git a/sdk/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerInfoOverrideTest.java b/sdk/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerInfoOverrideTest.java deleted file mode 100644 index 5088a00cfc..0000000000 --- a/sdk/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerInfoOverrideTest.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (C) 2017 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.beam.runners.dataflow; - -import static org.junit.Assert.assertEquals; - -import java.io.InputStream; -import java.util.Properties; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests for {@link DataflowRunnerInfo} specifically validating that properties in - * this distrbution are correctly read. - */ -@RunWith(JUnit4.class) -public class DataflowRunnerInfoOverrideTest { - private static final String DATAFLOW_DISTRIBUTION_PROPERTIES_PATH = - "/org/apache/beam/runners/dataflow/dataflow-distribution.properties"; - - private static final String FNAPI_ENVIRONMENT_MAJOR_VERSION_KEY = - "fnapi.environment.major.version"; - private static final String LEGACY_ENVIRONMENT_MAJOR_VERSION_KEY = - "legacy.environment.major.version"; - private static final String CONTAINER_VERSION_KEY = "container.version"; - - - @Test - public void testDataflowDistributionOverride() throws Exception { - try (InputStream in = - DataflowRunnerInfo.class.getResourceAsStream(DATAFLOW_DISTRIBUTION_PROPERTIES_PATH)) { - Properties properties = new Properties(); - properties.load(in); - - assertEquals(properties.getProperty(FNAPI_ENVIRONMENT_MAJOR_VERSION_KEY), - DataflowRunnerInfo.getDataflowRunnerInfo().getFnApiEnvironmentMajorVersion()); - assertEquals(properties.getProperty(LEGACY_ENVIRONMENT_MAJOR_VERSION_KEY), - DataflowRunnerInfo.getDataflowRunnerInfo().getLegacyEnvironmentMajorVersion()); - assertEquals(properties.getProperty(CONTAINER_VERSION_KEY), - DataflowRunnerInfo.getDataflowRunnerInfo().getContainerVersion()); - } - } -} diff --git a/sdk/suppressions.xml b/sdk/suppressions.xml deleted file mode 100644 index 4d707ab291..0000000000 --- a/sdk/suppressions.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - - -