28. Original Features
object SquawkLength extends FeatureType[Int]
object Super extends LabelType[Boolean]
val originalFeatures: Set[FeatureType] = Set(SquawkLength)
val label = Super
39. Data Preparation
val labelIndexer = new StringIndexer()
.setInputCol("label")
.setOutputCol("indexedLabel")
.fit(instances)
val featureIndexer = new VectorIndexer()
.setInputCol("features")
.setOutputCol("indexedFeatures")
.fit(instances)
val Array(trainingData, testingData) = instances.randomSplit(
Array(0.8, 0.2))
40. Learning a Model
val decisionTree = new DecisionTreeClassifier()
.setLabelCol("indexedLabel")
.setFeaturesCol("indexedFeatures")
val labelConverter = new IndexToString()
.setInputCol("prediction")
.setOutputCol("predictedLabel")
.setLabels(labelIndexer.labels)
val pipeline = new Pipeline()
.setStages(Array(labelIndexer, featureIndexer, decisionTree,
labelConverter))
41. Evolving Modeling Strategies
val randomForest = new RandomForestClassifier()
.setLabelCol("indexedLabel")
.setFeaturesCol("indexedFeatures")
val revisedPipeline = new Pipeline()
.setStages(Array(labelIndexer, featureIndexer, randomForest,
labelConverter))
55. Model Metrics
val trainingSummary = model.summary
val binarySummary = trainingSummary
.asInstanceOf[BinaryLogisticRegressionSummary]
binarySummary.roc
roc.show()
binarySummary.areaUnderROC
56. Model Metrics
val predictions = model.transform(testingData)
val evaluator = new BinaryClassificationEvaluator()
.setLabelCol("label")
.setRawPredictionCol("rawPrediction")
.setMetricName("areaUnderPR")
val areaUnderPR = evaluator.evaluate(predictions)
57. Building Lineages
val rawData: RawData
val featureSet: Set[FeatureType]
val model: ClassificationModel
val modelMetrics: BinaryLogisticRegressionSummary