You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2020/08/06 17:04:07 UTC

[GitHub] [beam] pabloem commented on a change in pull request #12427: [BEAM-2855] nexmark python suite implement queries 0, 1, 2 and 9

pabloem commented on a change in pull request #12427:
URL: https://github.com/apache/beam/pull/12427#discussion_r465923575



##########
File path: sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query2.py
##########
@@ -29,17 +29,15 @@
 from __future__ import absolute_import
 
 import apache_beam as beam
-from apache_beam.testing.benchmarks.nexmark.models import nexmark_model
-from apache_beam.testing.benchmarks.nexmark.nexmark_util import ParseEventFn
-from apache_beam.testing.benchmarks.nexmark.nexmark_util import display
+from apache_beam.testing.benchmarks.nexmark.models import auction_price
+from apache_beam.testing.benchmarks.nexmark.queries import nexmark_query_util
 
 
-def load(raw_events, metadata=None):
+def load(events, metadata=None):

Review comment:
       Perhaps it's not that important, but it looks like these events are already parsed (hence the name `events` instead of `raw_events`. Should you rename the pcollection in query 1 and query 0?

##########
File path: sdks/python/apache_beam/testing/benchmarks/nexmark/nexmark_util.py
##########
@@ -103,6 +121,107 @@ def process(self, elem):
     yield event
 
 
+class ParseJsonEvnetFn(beam.DoFn):
+  """Parses the raw event info into a Python objects.
+
+  Each event line has the following format:
+
+    person: {id,name,email,credit_card,city, \
+                          state,timestamp,extra}
+    auction: {id,item_name, description,initial_bid, \
+                          reserve_price,timestamp,expires,seller,category,extra}
+    bid: {auction,bidder,price,timestamp,extra}
+
+  For example:
+
+    {"id":1000,"name":"Peter Jones","emailAddress":"nhd@xcat.com",\
+        "creditCard":"7241 7320 9143 4888","city":"Portland","state":"WY",\

Review comment:
       can you improve the formatting of these docs?

##########
File path: sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query2.py
##########
@@ -29,17 +29,15 @@
 from __future__ import absolute_import
 
 import apache_beam as beam
-from apache_beam.testing.benchmarks.nexmark.models import nexmark_model
-from apache_beam.testing.benchmarks.nexmark.nexmark_util import ParseEventFn
-from apache_beam.testing.benchmarks.nexmark.nexmark_util import display
+from apache_beam.testing.benchmarks.nexmark.models import auction_price
+from apache_beam.testing.benchmarks.nexmark.queries import nexmark_query_util
 
 
-def load(raw_events, metadata=None):
+def load(events, metadata=None):
   return (
-      raw_events
-      | 'ParseEventFn' >> beam.ParDo(ParseEventFn())
-      | 'FilterInAuctionsWithSelectedId' >> beam.Filter(
-          lambda event: (
-              isinstance(event, nexmark_model.Auction) and event.id == metadata.
-              get('auction_id')))
-      | 'DisplayQuery2' >> beam.Map(display))  # pylint: disable=expression-not-assigned
+      events
+      | nexmark_query_util.JustBids()

Review comment:
       The documentation for q2 says `Select auctions by auction id.` - but I see you're selecting Bids, and then filtering them by Auction. Should the documentation be updated or should you be selecting Auctions and filtering those?

##########
File path: sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query1.py
##########
@@ -29,20 +29,16 @@
 
 import apache_beam as beam
 from apache_beam.testing.benchmarks.nexmark.models import nexmark_model
-from apache_beam.testing.benchmarks.nexmark.nexmark_util import ParseEventFn
-from apache_beam.testing.benchmarks.nexmark.nexmark_util import display
+from apache_beam.testing.benchmarks.nexmark.queries import nexmark_query_util
 
 
 def load(raw_events, query_args=None):
   return (
       raw_events
-      | 'ParseEventFn' >> beam.ParDo(ParseEventFn())
-      | 'FilterInBids' >>
-      beam.Filter(lambda event: isinstance(event, nexmark_model.Bid))
+      | nexmark_query_util.JustBids()
       | 'ConvertToEuro' >> beam.Map(
           lambda bid: nexmark_model.Bid(
               bid.auction,
-              bid.bidder, (float(bid.price) * 89) // 100,
-              bid.timestamp,
-              bid.extra))
-      | 'DisplayQuery1' >> beam.Map(display))  # pylint: disable=expression-not-assigned
+              bid.bidder, (bid.price * 89) // 100,

Review comment:
       Perhaps create constants for 89 and 100? `EURO_RATE = 0.89` or something like that?

##########
File path: sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query9.py
##########
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+

Review comment:
       Please document query9 at the top




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org