Skip to content

Add Ring[BigDecimal], modeled after Ring[BigInt] #553

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 21, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import CMSFunctions.generateHashes
* We benchmark different `K` types as well as different input data streams.
*/
object CMSBenchmark {

import CMSHasherImplicits.CMSHasherBigInt

@State(Scope.Benchmark)
Expand All @@ -36,24 +35,34 @@ object CMSBenchmark {
var smallLongs: Vector[Long] = _
var smallBigInts: Vector[BigInt] = _
var largeBigInts: Vector[BigInt] = _
var smallBigDecimals: Vector[BigDecimal] = _
var largeBigDecimals: Vector[BigDecimal] = _
var largeStrings: Vector[String] = _

// need to initialize later because we don't have `eps` and `delta` yet.
var longMonoid: CMSMonoid[Long] = _
var bigIntMonoid: CMSMonoid[BigInt] = _
var bigDecimalMonoid: CMSMonoid[BigDecimal] = _
var stringMonoid: CMSMonoid[String] = _

@Setup(Level.Trial)
def setup(): Unit = {
longMonoid = CMS.monoid[Long](eps, delta, Seed)
bigIntMonoid = CMS.monoid[BigInt](eps, delta, Seed)
bigDecimalMonoid = CMS.monoid[BigDecimal](eps, delta, Seed)
stringMonoid = CMS.monoid[String](eps, delta, Seed)

val bitsPerChar = 16
largeStrings = (1 to size).map(i => nextString(MaxBits / bitsPerChar)).toVector
largeBigInts = largeStrings.map(s => BigInt(s.getBytes)).toVector
largeBigInts = largeStrings.map(s => BigInt(s.getBytes))
largeBigDecimals = largeStrings.map(s => {
val md = (s.head % 256) - 128
BigDecimal(BigInt(s.tail.getBytes)) * BigDecimal(1).pow(md)
})

smallLongs = (1 to size).map(_.toLong).toVector
smallBigInts = (1 to size).map(BigInt(_)).toVector
smallBigDecimals = (1 to size).map(BigDecimal(_) + BigDecimal(1).pow(-size)).toVector
}

}
Expand All @@ -77,6 +86,14 @@ class CMSBenchmark {
def sumLargeBigIntCms(st: CMSState): CMS[BigInt] =
sumCmsVector(st.largeBigInts, st.bigIntMonoid)

@Benchmark
def sumSmallBigDecimalCms(st: CMSState): CMS[BigDecimal] =
sumCmsVector(st.smallBigDecimals, st.bigDecimalMonoid)

@Benchmark
def sumLargeBigDecimalCms(st: CMSState): CMS[BigDecimal] =
sumCmsVector(st.largeBigDecimals, st.bigDecimalMonoid)

@Benchmark
def sumLargeStringCms(st: CMSState): CMS[String] =
sumCmsVector(st.largeStrings, st.stringMonoid)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,33 @@ object TopCMSBenchmark {
var smallLongs: Vector[Long] = _
var smallBigInts: Vector[BigInt] = _
var largeBigInts: Vector[BigInt] = _
var smallBigDecimals: Vector[BigDecimal] = _
var largeBigDecimals: Vector[BigDecimal] = _
var largeStrings: Vector[String] = _

var cmsLongMonoid: TopPctCMSMonoid[Long] = _
var cmsBigIntMonoid: TopPctCMSMonoid[BigInt] = _
var cmsBigDecimalMonoid: TopPctCMSMonoid[BigDecimal] = _
var cmsStringMonoid: TopPctCMSMonoid[String] = _

@Setup(Level.Trial)
def setup(): Unit = {
cmsLongMonoid = TopPctCMS.monoid[Long](eps, delta, Seed, pct)
cmsBigIntMonoid = TopPctCMS.monoid[BigInt](eps, delta, Seed, pct)
cmsBigDecimalMonoid = TopPctCMS.monoid[BigDecimal](eps, delta, Seed, pct)
cmsStringMonoid = TopPctCMS.monoid[String](eps, delta, Seed, pct)

val bitsPerChar = 16
largeStrings = (1 to size).map(i => nextString(MaxBits / bitsPerChar)).toVector
largeBigInts = largeStrings.map(s => BigInt(s.getBytes)).toVector
largeBigInts = largeStrings.map(s => BigInt(s.getBytes))
largeBigDecimals = largeStrings.map(s => {
val md = (s.head % 256) - 128
BigDecimal(BigInt(s.tail.getBytes)) * BigDecimal(1).pow(md)
})

smallLongs = (1 to size).map(_.toLong).toVector
smallBigInts = (1 to size).map(BigInt(_)).toVector
smallBigDecimals = (1 to size).map(BigDecimal(_) + BigDecimal(1).pow(-size)).toVector
}
}

Expand All @@ -75,6 +85,14 @@ class TopCMSBenchmark {
def sumLargeBigIntTopCms(st: CMSState) =
sumTopCmsVector(st.largeBigInts, st.cmsBigIntMonoid)

@Benchmark
def sumSmallBigDecimalTopCms(st: CMSState) =
sumTopCmsVector(st.smallBigDecimals, st.cmsBigDecimalMonoid)

@Benchmark
def sumLargeBigDecimalTopCms(st: CMSState) =
sumTopCmsVector(st.largeBigDecimals, st.cmsBigDecimalMonoid)

@Benchmark
def sumLargeStringTopCms(st: CMSState) =
sumTopCmsVector(st.largeStrings, st.cmsStringMonoid)
Expand Down
16 changes: 16 additions & 0 deletions algebird-core/src/main/scala/com/twitter/algebird/CMSHasher.scala
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,20 @@ object CMSHasher {
override def hash(a: Int, b: Int, width: Int)(x: Array[Byte]): Int = hashBytes(a, b, width)(x)
}

// Note: CMSHasher[BigInt] not provided here but in CMSHasherImplicits for legacy support reasons. New hashers
// should come here.

implicit object CMSHasherBigDecimal extends CMSHasher[BigDecimal] {
override def hash(a: Int, b: Int, width: Int)(x: BigDecimal): Int = {

val uh = scala.util.hashing.MurmurHash3.arrayHash(x.underlying.unscaledValue.toByteArray, a)
val hash = scala.util.hashing.MurmurHash3.productHash((uh, x.scale), a)

// We only want positive integers for the subsequent modulo. This method mimics Java's Hashtable
// implementation. The Java code uses `0x7FFFFFFF` for the bit-wise AND, which is equal to Int.MaxValue.
val positiveHash = hash & Int.MaxValue
positiveHash % width
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ object Group extends GeneratedGroupImplicits with ProductGroups {
implicit val jshortGroup: Group[JShort] = JShortRing
implicit val longGroup: Group[Long] = LongRing
implicit val bigIntGroup: Group[BigInt] = BigIntRing
implicit val bigDecimalGroup: Group[BigDecimal] = BigDecimalRing
implicit val jlongGroup: Group[JLong] = JLongRing
implicit val floatGroup: Group[Float] = FloatField
implicit val jfloatGroup: Group[JFloat] = JFloatField
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ object Monoid extends GeneratedMonoidImplicits with ProductMonoids {
implicit val shortMonoid: Monoid[Short] = ShortRing
implicit val jshortMonoid: Monoid[JShort] = JShortRing
implicit val bigIntMonoid: Monoid[BigInt] = BigIntRing
implicit val bigDecimalMonoid: Monoid[BigDecimal] = BigDecimalRing
implicit val longMonoid: Monoid[Long] = LongRing
implicit val jlongMonoid: Monoid[JLong] = JLongRing
implicit val floatMonoid: Monoid[Float] = FloatField
Expand Down
2 changes: 2 additions & 0 deletions algebird-core/src/main/scala/com/twitter/algebird/Ring.scala
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ object LongRing extends Ring[Long] {
}

object BigIntRing extends NumericRing[BigInt]
object BigDecimalRing extends NumericRing[BigDecimal]

object Ring extends GeneratedRingImplicits with ProductRings {
// This pattern is really useful for typeclasses
Expand All @@ -139,6 +140,7 @@ object Ring extends GeneratedRingImplicits with ProductRings {
implicit val jshortRing: Ring[JShort] = JShortRing
implicit val longRing: Ring[Long] = LongRing
implicit val bigIntRing: Ring[BigInt] = BigIntRing
implicit val bigDecimalRing: Ring[BigDecimal] = BigDecimalRing
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why didn't line 134 above work? Or, maybe it did here, but not for Semirgoup, Monoid, Group? If so, I guess we should fix that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well. I didn't go that far. My initial motivation was that eventually I no longer need to provide my own Semigroup[BigDecimal]; as Ring[BigInt] was here and is used by `Semigroup[BigInt],Monoid[BigInt]``,``Group[BigInt]``, I just xerox'd the pattern.

implicit val jlongRing: Ring[JLong] = JLongRing
implicit val floatRing: Ring[Float] = FloatField
implicit val jfloatRing: Ring[JFloat] = JFloatField
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ object Semigroup extends GeneratedSemigroupImplicits with ProductSemigroups {
implicit val jshortSemigroup: Semigroup[JShort] = JShortRing
implicit val longSemigroup: Semigroup[Long] = LongRing
implicit val bigIntSemigroup: Semigroup[BigInt] = BigIntRing
implicit val bigDecimalSemigroup: Semigroup[BigDecimal] = BigDecimalRing
implicit val jlongSemigroup: Semigroup[JLong] = JLongRing
implicit val floatSemigroup: Semigroup[Float] = FloatField
implicit val jfloatSemigroup: Semigroup[JFloat] = JFloatField
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,24 @@ limitations under the License.

package com.twitter.algebird

import org.scalacheck.Arbitrary
import org.scalacheck.Prop
import org.scalacheck.{Arbitrary, Gen, Prop}
import org.scalacheck.Prop.forAll

import scala.math.Equiv

/**
* Base properties useful for all tests using Algebird's typeclasses.
*/

object BaseProperties {
val arbReasonableBigDecimals: Arbitrary[BigDecimal] = Arbitrary(
for {
scale <- Gen.choose(-128, +128)
base <- implicitly[Arbitrary[BigInt]].arbitrary
} yield {
(BigDecimal(base) * BigDecimal(10).pow(scale))
})

def defaultEq[T](t0: T, t1: T) = t0 == t1

trait HigherEq[M[_]] {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import Helpers.arbitraryBatched
class BatchedLaws extends CheckProperties with Matchers with PropertyChecks {

import BaseProperties._
implicit val arbitraryBigDecimalsHere = BaseProperties.arbReasonableBigDecimals

def testBatchedMonoid[A: Arbitrary: Monoid](name: String, size: Int): Unit = {
implicit val m: Monoid[Batched[A]] = Batched.compactingMonoid[A](size)
Expand All @@ -43,6 +44,10 @@ class BatchedLaws extends CheckProperties with Matchers with PropertyChecks {
testBatchedMonoid[BigInt]("BigInt", 10)
testBatchedMonoid[BigInt]("BigInt", 100)
testBatchedMonoid[BigInt]("BigInt", 1000000)
testBatchedMonoid[BigDecimal]("BigDecimal", 1)
testBatchedMonoid[BigDecimal]("BigDecimal", 10)
testBatchedMonoid[BigDecimal]("BigDecimal", 100)
testBatchedMonoid[BigDecimal]("BigDecimal", 1000000)
testBatchedMonoid[String]("String", 1)
testBatchedMonoid[String]("String", 10)
testBatchedMonoid[String]("String", 100)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ class CmsLaws extends PropSpec with PropertyChecks with Matchers {
check(monoidLawsEquiv[CMS[BigInt]])
}

property("CountMinSketch[BigDecimal] is a Monoid") {
implicit val cmsMonoid = CMS.monoid[BigDecimal](EPS, DELTA, SEED)
implicit val cmsGen = createArbitrary[BigDecimal](cmsMonoid)
check(monoidLawsEquiv[CMS[BigDecimal]])
}

property("CountMinSketch[String] is a Monoid") {
implicit val cmsMonoid = CMS.monoid[String](EPS, DELTA, SEED)
implicit val cmsGen = createArbitrary[String](cmsMonoid)
Expand Down Expand Up @@ -111,6 +117,12 @@ class TopPctCmsLaws extends PropSpec with PropertyChecks with Matchers {
monoidLaws[TopCMS[BigInt]]
}

property("TopPctCms[BigDecimal] is a Monoid") {
implicit val cmsMonoid = TopPctCMS.monoid[BigDecimal](EPS, DELTA, SEED, HEAVY_HITTERS_PCT)
implicit val cmsGen = createArbitrary[BigDecimal](cmsMonoid)
monoidLaws[TopCMS[BigDecimal]]
}

property("TopPctCms[String] is a Monoid") {
implicit val cmsMonoid = TopPctCMS.monoid[String](EPS, DELTA, SEED, HEAVY_HITTERS_PCT)
implicit val cmsGen = createArbitrary[String](cmsMonoid)
Expand Down Expand Up @@ -273,6 +285,7 @@ class CMSShortTest extends CMSTest[Short]
class CMSIntTest extends CMSTest[Int]
class CMSLongTest extends CMSTest[Long]
class CMSBigIntTest extends CMSTest[BigInt]
class CMSBigDecimalTest extends CMSTest[BigDecimal]
class CMSStringTest extends CMSTest[String]
class CMSBytesTest extends CMSTest[Bytes]

Expand Down Expand Up @@ -964,6 +977,7 @@ class CMSHasherShortSpec extends CMSHasherSpec[Short]
class CMSHasherIntSpec extends CMSHasherSpec[Int]
class CMSHasherLongSpec extends CMSHasherSpec[Long]
class CMSHasherBigIntSpec extends CMSHasherSpec[BigInt]
class CMSHasherBigDecimalSpec extends CMSHasherSpec[BigDecimal]
class CMSHasherStringSpec extends CMSHasherSpec[String]
class CMSHasherBytesSpec extends CMSHasherSpec[Bytes]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ object FromIntLike {
override def fromInt(x: Int): BigInt = BigInt(x)
}

implicit object FromIntBigDecimal extends FromIntLike[BigDecimal] {
override def fromInt(x: Int): BigDecimal = BigDecimal(x)
}

implicit object FromIntString extends FromIntLike[String] {
override def fromInt(x: Int): String = x.toString
}
Expand Down