CatBoost UDF

Applying CatBoost models.

CatBoost::LoadModel(
  String -- path to file with the model
) -> Resource<CatBoost.TFormulaEvaluator> -- indicates a ready-to-use model

CatBoost::EvaluateBatch(
  Resource<Catboost.TFormulaEvaluator>, -- result CatBoost::LoadModel
  List<
    Struct<
      FloatFeatures:List<Float>, -- numerical features
      CatFeatures:List<String>, -- categorial features
      PassThrough:* -- arbitrary data passed through the function unchanged
    >
  >,
  Uint32? -- TargetCount, number of targets in the model. Default is 1
  Uint32? -- BatchSize, default is 128
) -> List<
  Struct<
    Result:List<Float>, -- model application result
    PassThrough:* -- passed to PassThrough argument unchanged
  >
> -- result

Usage example

Task: apply CatBoost model that has been trained on data similar to input table to guess the value of Target column based on other column values.

Input table catboost_data data example

Age

Cat1

Cat2

Cat3

Children

Education

Float1

Float2

Float3

Float4

Gender

Group

Income

MaritalStatus

Nationality

Occupation

String

Target

19

"0"

"n"

"1"

"Own-child"

"Some-college"

10

0

0

40

"Male"

"?"

208874

"Never-married"

"White"

"?"

"United-States"

"1"

27

"0"

"n"

"1"

"Not-in-family"

"Some-college"

10

0

0

40

"Female"

"Private"

158647

"Never-married"

"White"

"Adm-clerical"

"United-States"

"1"

18

"0"

"n"

"1"

"Unmarried"

"10th"

6

0

0

40

"Male"

"Private"

115258

"Never-married"

"White"

"Craft-repair"

"United-States"

"1"

49

"0"

"n"

"1"

"Unmarried"

"Assoc-voc"

11

0

1380

42

"Male"

"Private"

141944

"Married-spouse-absent"

"White"

"Handlers-cleaners"

"United-States"

"1"

37

"0"

"n"

"1"

"Husband"

"5th-6th"

3

0

0

40

"Male"

"Private"

227128

"Married-civ-spouse"

"White"

"Craft-repair"

"United-States"

"1"

28

"0"

"n"

"1"

"Unmarried"

"HS-grad"

9

0

0

55

"Male"

"Private"

22422

"Never-married"

"White"

"Transport-moving"

"United-States"

"1"

28

"0"

"n"

"1"

"Husband"

"HS-grad"

9

0

0

45

"Male"

"Private"

190367

"Married-civ-spouse"

"White"

"Machine-op-inspct"

"United-States"

"1"

20

"0"

"n"

"1"

"Own-child"

"Some-college"

10

0

0

36

"Male"

"?"

287681

"Never-married"

" White"

"?"

"United-States"

"1"

43

"0"

"n"

"1"

"Unmarried"

"Some-college"

10

0

0

38

"Female"

"Private"

196158

"Divorced"

"White"

"Adm-clerical"

"United-States"

"1"

30

"0"

"n"

"1"

"Own-child"

"HS-grad"

9

0

0

40

"Male"

"Private"

112650

"Divorced"

"White"

"Craft-repair"

"United-States"

"1"

PRAGMA file( -- specify the URL where to get the model
    "model.bin",
    "<URL>"
);

-- Initialize CatBoost FormulaEvaluator with given model:
$evaluator = CatBoost::LoadModel(
    FilePath("model.bin")
);

-- Prepare the data:
$data = (SELECT
    [ -- Float features are packed into a List<Float>
        Age,
        Income,
        Float1,
        Float2,
        Float3,
        Float4
    ] AS FloatFeatures,
    [ -- Cat features are packed into List<String>
        Cat1,
        Cat2,
        Cat3,
        `Group`,
        Education,
        MaritalStatus,
        Occupation,
        Children,
        Nationality,
        Gender,
        `String`
    ] AS CatFeatures,
    Target AS PassThrough -- you can pass any values through
                          -- batch mode of CatBoost as is
FROM catboost_data);

$processed = (PROCESS $data
USING CatBoost::EvaluateBatch( -- Evaluate model in batch mode
    $evaluator,                -- on the data above
    TableRows()
));

SELECT
    Result[0] > 0.5 AS Guess, -- this model has one target,
                              -- so the Result list has only one item
    PassThrough == "1" AS Answer, -- the PassThrough contains the Target column
                                  -- value that has been passed to it
    Result[0] - 0.5 AS Score
FROM $processed;
Previous