This project has retired. For details please refer to its Attic page.

This examples demonstrates how to modify E-Commerce Recommendation template to use "rate" event as Training Data.

However, recent "view" event is still used for recommendation for new user (to recommend items similar to what new user just recently viewed) and the returned scores are not predicted rating but a ranked scores for new user.

This template also supports that the user may rate same item multiple times and latest rating value will be used for training. The modification can be further simplified if the support of this case is not needed.

You can find the complete modified source code here.

Modification

DataSource.scala

In DataSource, change ViewEvent case class to RateEvent. Add rating: Double is added to the RateEvent.

Change

1
case class ViewEvent(user: String, item: String, t: Long)

to

1
2
// MODIFIED
case class RateEvent(user: String, item: String, rating: Double, t: Long)

Modify TrainingData class to use rateEvent

1
2
3
4
5
6
7
8
9
10
11
12
13
14
class TrainingData(
  val users: RDD[(String, User)],
  val items: RDD[(String, Item)],
  val rateEvents: RDD[RateEvent], // MODIFIED
  val buyEvents: RDD[BuyEvent]
) extends Serializable {
  override def toString = {
    s"users: [${users.count()} (${users.take(2).toList}...)]" +
    s"items: [${items.count()} (${items.take(2).toList}...)]" +
    // MODIFIED
    s"rateEvents: [${rateEvents.count()}] (${rateEvents.take(2).toList}...)" +
    s"buyEvents: [${buyEvents.count()}] (${buyEvents.take(2).toList}...)"
  }
}

Modify readTraining() function of DataSource to read "rate" events (commented with "// MODIFIED"). Replace all ViewEvent with RateEvent. Replace all viewEventsRDD with rateEventsRDD. Retrieve the rating value from the event properties:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
  override
  def readTraining(sc: SparkContext): TrainingData = {
    ...

    val eventsRDD: RDD[Event] = PEventStore.find(
      appName = dsp.appName,
      entityType = Some("user"),
      eventNames = Some(List("rate", "buy")), // MODIFIED
      // targetEntityType is optional field of an event.
      targetEntityType = Some(Some("item")))(sc)
      .cache()

    val rateEventsRDD: RDD[RateEvent] = eventsRDD // MODIFIED
      .filter { event => event.event == "rate" } // MODIFIED
      .map { event =>
        try {
          RateEvent( // MODIFIED
            user = event.entityId,
            item = event.targetEntityId.get,
            rating = event.properties.get[Double]("rating"), // ADDED
            t = event.eventTime.getMillis
          )
        } catch {
          case e: Exception =>
            logger.error(s"Cannot convert ${event} to RateEvent." + // MODIFIED
              s" Exception: ${e}.")
            throw e
        }
      }

    ...

    new TrainingData(
      users = usersRDD,
      items = itemsRDD,
      rateEvents = rateEventsRDD, // MODIFIED
      buyEvents = buyEventsRDD
    )
  }

Preparator.scala

Modify Preparator to pass rateEvents to algorithm as PreparedData (Replace all ViewEvent with RateEvent. Replace all viewEvents with rateEvents)

Modify Preparator's parpare() method:

1
2
3
4
5
6
7
8
9
10
  ...

  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {
    new PreparedData(
      users = trainingData.users,
      items = trainingData.items,
      rateEvents = trainingData.rateEvents, // MODIFIED
      buyEvents = trainingData.buyEvents)
  }

Modify PreparedData class:

1
2
3
4
5
6
7
class PreparedData(
  val users: RDD[(String, User)],
  val items: RDD[(String, Item)],
  val rateEvents: RDD[RateEvent], // MODIFIED
  val buyEvents: RDD[BuyEvent]
) extends Serializable

ECommAlgorithm.scala

Modify train() method to train with rate event.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
  def train(sc: SparkContext, data: PreparedData): ECommModel = {
    require(!data.rateEvents.take(1).isEmpty, // MODIFIED
      s"rateEvents in PreparedData cannot be empty." + // MODIFIED
      " Please check if DataSource generates TrainingData" +
      " and Preprator generates PreparedData correctly.")

    ...
  }

  def genMLlibRating(
    userStringIntMap: BiMap[String, Int],
    itemStringIntMap: BiMap[String, Int],
    data: PreparedData): RDD[MLlibRating] = {

    val mllibRatings = data.rateEvents // MODIFIED
      .map { r =>
        ...

        ((uindex, iindex), (r.rating, r.t)) // MODIFIED
      }
      .filter { case ((u, i), v) =>
        // keep events with valid user and item index
        (u != -1) && (i != -1)
      }
      .reduceByKey { case (v1, v2) => // MODIFIED
        // if a user may rate same item with different value at different times,
        // use the latest value for this case.
        // Can remove this reduceByKey() if no need to support this case.
        val (rating1, t1) = v1
        val (rating2, t2) = v2
        // keep the latest value
        if (t1 > t2) v1 else v2
      }
      .map { case ((u, i), (rating, t)) => // MODIFIED
        // MLlibRating requires integer index for user and item
        MLlibRating(u, i, rating) // MODIFIED
      }
      .cache()

    mllibRatings
  }

Modify train() method to use ALS.trainImplicit():

Change the following from:

1
2
3
4
5
6
7
8
9
10
11
12
    ...

    val m = ALS.trainImplicit(
      ratings = mllibRatings,
      rank = ap.rank,
      iterations = ap.numIterations,
      lambda = ap.lambda,
      blocks = -1,
      alpha = 1.0,
      seed = seed)
    ...

to:

1
2
3
4
5
6
7
8
9
10
11
    ...

    val m = ALS.train( // MODIFIED
      ratings = mllibRatings,
      rank = ap.rank,
      iterations = ap.numIterations,
      lambda = ap.lambda,
      blocks = -1,
      seed = seed)
    ...

That's it! Now your engine can train model with rate events.