Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

normalize field mode #701

Merged
merged 1 commit into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,13 @@ lazy val releaseSettings = Seq(
name = "Rafal Wojdyla",
email = "[email protected]",
url = url("https://twitter.com/ravwojdyla")
)
),
Developer(
id = "benk",
name = "Ben Konz",
email = "[email protected]",
url = url("https://benkonz.github.io/")
),
)
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.spotify.ratatool

object BigQueryUtil {
// a null TableFieldSchema mode can be treated as "NULLABLE", which is the
// default value according to the docs, so return "NULLABLE" if fieldMode is null
// otherwise return fieldMode
def getFieldModeWithDefault(fieldMode: String): String =
fieldMode match {
case null => "NULLABLE"
case _ => fieldMode
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,35 +17,33 @@

package com.spotify.ratatool.diffy

import java.nio.ByteBuffer
import com.google.api.services.bigquery.model.{TableFieldSchema, TableRow, TableSchema}
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.BaseEncoding
import com.google.protobuf.AbstractMessage
import com.spotify.ratatool.BigQueryUtil.getFieldModeWithDefault
import com.spotify.ratatool.Command
import com.spotify.ratatool.io.ParquetIO
import com.spotify.ratatool.samplers.{AvroSampler, ParquetSampler}
import com.spotify.ratatool.samplers.AvroSampler
import com.spotify.scio._
import com.spotify.scio.avro._
import com.spotify.scio.bigquery._
import com.spotify.scio.parquet.avro._
import com.spotify.scio.bigquery.client.BigQuery
import com.spotify.scio.bigquery.types.BigQueryType
import com.spotify.scio.coders.Coder
import com.spotify.scio.coders.kryo._
import com.spotify.scio.io.ClosedTap
import com.spotify.scio.parquet.avro._
import com.spotify.scio.values.SCollection
import com.twitter.algebird._
import org.apache.avro.SchemaCompatibility.SchemaCompatibilityType
import org.apache.avro.{Schema, SchemaCompatibility}
import org.apache.avro.generic.GenericRecord
import org.apache.avro.specific.SpecificRecordBase
import org.apache.beam.sdk.io.TextIO
import org.apache.beam.sdk.io.gcp.bigquery.TableRowJsonCoder
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.BaseEncoding
import org.slf4j.{Logger, LoggerFactory}

import java.nio.ByteBuffer
import scala.annotation.tailrec
import scala.jdk.CollectionConverters._
import scala.collection.mutable
import scala.jdk.CollectionConverters._
import scala.language.higherKinds
import scala.reflect.ClassTag
import scala.util.{Failure, Success, Try}
Expand Down Expand Up @@ -555,7 +553,12 @@ object BigDiffy extends Command with Serializable {
case (Some(f), None) => f
case (None, Some(f)) => f
case (Some(fx), Some(fy)) =>
assert(fx.getType == fy.getType && fx.getMode == fy.getMode)
val fxMode = getFieldModeWithDefault(fx.getMode)
val fyMode = getFieldModeWithDefault(fy.getMode)
assert(
fx.getType == fy.getType && fxMode == fyMode,
f"field ${fx.getName} in lhs, type: ${fx.getType} mode: $fxMode, and rhs, type: ${fy.getType} mode: $fyMode, do not match"
)
if (fx.getType == "RECORD") {
fx.setFields(
mergeFields(fx.getFields.asScala.toList, fy.getFields.asScala.toList).asJava
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ package com.spotify.ratatool.scalacheck

import java.nio.ByteBuffer
import java.util

import com.google.api.services.bigquery.model.{TableFieldSchema, TableRow, TableSchema}
import com.spotify.ratatool.BigQueryUtil.getFieldModeWithDefault
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.BaseEncoding
import org.joda.time._
import org.joda.time.format.DateTimeFormat
Expand Down Expand Up @@ -144,7 +144,7 @@ trait TableRowGeneratorOps {
case t => throw new RuntimeException(s"Unknown type: $t")
}

fieldSchema.getMode match {
getFieldModeWithDefault(fieldSchema.getMode) match {
case "REQUIRED" => genV()
case "NULLABLE" =>
Arbitrary.arbBool.arbitrary.flatMap { e =>
Expand Down