Program Listing for File preprocessor.hpp

Return to documentation for file (/home/jenkins/docs/models/dataloader/preprocessor.hpp)

#ifndef MODELS_DATALOADER_PREPROCESSOR_HPP
#define MODELS_DATALOADER_PREPROCESSOR_HPP

#include <mlpack/prereqs.hpp>

namespace mlpack {
namespace models {

template<
  typename DatasetX = arma::mat,
  typename DatasetY = arma::mat
>
class PreProcessor
{
 public:
  static void MNIST(DatasetX& /* trainX */,
                    DatasetY& trainY,
                    DatasetY& /* validX */,
                    DatasetY& validY,
                    DatasetX& /* testX */)
  {
    trainY = trainY + 1;
    validY = validY + 1;
  }

  static void PascalVOC(DatasetX& /* trainX */,
                        DatasetY& /* trainY */,
                        DatasetY& /* validX */,
                        DatasetY& /* validY */,
                        DatasetX& /* testX */)
  {
    // Nothing to do here. Added to match the rest of the codebase.
  }

  static void CIFAR10(DatasetX & /* trainX */,
                      DatasetY & /* trainY */,
                      DatasetY & /* validX */,
                      DatasetY & /* validY */,
                      DatasetX & /* testX */)
  {
    // Nothing to do here. Added to match the rest of the codebase.
  }

  static void ChannelFirstImages(DatasetX& trainFeatures,
      const size_t imageWidth,
      const size_t imageHeight,
      const size_t imageDepth,
      bool normalize = true)
  {
    for (size_t idx = 0; idx < trainFeatures.n_cols; idx++)
    {
        // Create a copy of the current image so that the image isn't affected.
        arma::cube inputTemp(trainFeatures.col(idx).memptr(), 3, 224, 224);

        size_t currentOffset = 0;
        for (size_t i = 0; i < inputTemp.n_slices; i++)
        {
          trainFeatures.col(idx)(arma::span(currentOffset, currentOffset +
              inputTemp.slice(i).n_elem - 1), arma::span()) =
              arma::vectorise(inputTemp.slice(i).t());
          currentOffset += inputTemp.slice(i).n_elem;
        }
    }

    if (normalize)
    {
      // Convert each element to uint8 first and then divide by 255.
      for (size_t i = 0; i < trainFeatures.n_elem; i++)
          trainFeatures(i) = ((uint8_t)(trainFeatures(i)) / 255.0);
    }
  }

  template<typename eT>
  static void YOLOPreProcessor(const DatasetY& annotations,
                               arma::Mat<eT>& output,
                               const size_t version = 1,
                               const size_t imageWidth = 224,
                               const size_t imageHeight = 224,
                               const size_t gridWidth = 7,
                               const size_t gridHeight = 7,
                               const size_t numBoxes = 2,
                               const size_t numClasses = 20,
                               const bool normalize = true)
  {
    // See if we can change this to v4 / v5.
    mlpack::Log::Assert(version >= 1 && version <= 3, "Supported YOLO versions \
        are version 1 to version 3.");

    mlpack::Log::Assert(typeid(annotations) == typeid(arma::field<arma::vec>),
        "Use Field type to represent annotations.");

    size_t batchSize = annotations.n_cols;
    size_t numPredictions = 5 * numBoxes + numClasses;
    if (version > 1)
    {
      // Each bounding boxes has a corresponding class.
      numPredictions = numBoxes * (5 + numClasses);
    }

    double cellSizeHeight = (double) 1.0 / gridHeight;
    double cellSizeWidth = (double) 1.0 / gridWidth;

    // Set size of output and use cubes convenience.
    output.set_size(gridWidth * gridHeight * numPredictions, batchSize);
    output.zeros();

    // Use offset to create a cube for a particular column / batch.
    size_t offset = 0;
    for (size_t boxIdx = 0; boxIdx < batchSize; boxIdx++)
    {
      arma::cube outputTemp(const_cast<arma::Mat<eT> &>(output).memptr() +
          offset, gridHeight, gridWidth, numPredictions, false, false);
      offset += gridWidth * gridHeight * numPredictions;

      // Get the bounding box and labels corresponding to current image.
      arma::mat labels(1, annotations(0, boxIdx).n_elem / 5);
      arma::mat boundingBoxes(4, annotations(0, boxIdx).n_elem / 5);
      for (size_t i = 0; i < boundingBoxes.n_cols; i++)
      {
        labels.col(i)(0) = annotations(0, boxIdx)(i * 5);
        boundingBoxes.col(i) = annotations(0, boxIdx)(arma::span(i * 5 + 1,
            (i + 1) * 5 - 1));
      }

      // For YOLOv2 or higher, each bounding box can represent a class
      // so we don't repeat labels as done for YOLOv1. We will use map
      // to store last inserted bounding box.
      std::map<std::pair<size_t, size_t>, size_t> boundingBoxOffset;

      // Normalize the coordinates.
      boundingBoxes.row(0) /= imageWidth;
      boundingBoxes.row(2) /= imageWidth;
      boundingBoxes.row(1) /= imageHeight;
      boundingBoxes.row(3) /= imageHeight;

      // Get width and height as well as centres for the bounding box.
      arma::mat widthAndHeight(2, boundingBoxes.n_cols);
      widthAndHeight.row(0) = (boundingBoxes.row(2) - boundingBoxes.row(0));
      widthAndHeight.row(1) = (boundingBoxes.row(3) - boundingBoxes.row(1));

      arma::mat centres(2, boundingBoxes.n_cols);
      centres.row(0) = (boundingBoxes.row(2) + boundingBoxes.row(0)) / 2.0;
      centres.row(1) = (boundingBoxes.row(3) + boundingBoxes.row(1)) / 2.0;

      // Assign bounding boxes to the grid.
      for (size_t i = 0; i < boundingBoxes.n_cols; i++)
      {
        // Index for representing bounding box on grid.
        arma::vec gridCoordinates = centres.col(i);
        arma::vec centreCoordinates = centres.col(i);

        if (normalize)
        {
          gridCoordinates(0) = std::ceil(gridCoordinates(0) /
              cellSizeWidth) - 1;
          gridCoordinates(1) = std::ceil(gridCoordinates(1) /
              cellSizeHeight) - 1;
        }
        else
        {
          gridCoordinates(0) = std::ceil((gridCoordinates(0) /
              imageWidth) / cellSizeWidth) - 1;
          gridCoordinates(1) = std::ceil((gridCoordinates(1) /
              imageHeight) / cellSizeHeight) - 1;
        }

        size_t gridX = gridCoordinates(0);
        size_t gridY = gridCoordinates(1);
        gridCoordinates(0) = gridCoordinates(0) * cellSizeWidth;
        gridCoordinates(1) = gridCoordinates(1) * cellSizeHeight;

        // Normalize to 1.0.
        gridCoordinates = centres.col(i) - gridCoordinates;
        gridCoordinates(0) /= cellSizeWidth;
        gridCoordinates(1) /= cellSizeHeight;

        if (normalize)
          centreCoordinates = gridCoordinates;

        if (version == 1)
        {
          // Fill elements in the grid.
          for (size_t k = 0; k < numBoxes; k++)
          {
            size_t s = 5 * k;
            outputTemp(arma::span(gridX), arma::span(gridY),
                arma::span(s, s + 1)) = centreCoordinates;
            outputTemp(arma::span(gridX), arma::span(gridY),
                arma::span(s + 2, s + 3)) = widthAndHeight.col(i);
            outputTemp(gridX, gridY, s + 4) = 1.0;
          }
          outputTemp(gridX, gridY, 5 * numBoxes + labels.col(i)(0)) = 1;
        }
        else
        {
          size_t s = 0;
          if (boundingBoxOffset.count({gridX, gridY}))
          {
            s = boundingBoxOffset[{gridX, gridY}] + 1;
            boundingBoxOffset[{gridX, gridY}]++;
          }
          else
            boundingBoxOffset.insert({{gridX, gridY}, s});

          if (s > numBoxes)
            continue;

          size_t bBoxOffset = (5 + numClasses) * s;
          outputTemp(arma::span(gridX), arma::span(gridY),
              arma::span(bBoxOffset, bBoxOffset + 1)) = centreCoordinates;
          outputTemp(arma::span(gridX), arma::span(gridY),
              arma::span(bBoxOffset + 2,
                  bBoxOffset + 3)) = widthAndHeight.col(i);
          outputTemp(gridX, gridY, bBoxOffset + 4) = 1.0;
          outputTemp(gridX, gridY, bBoxOffset + 5 + labels.col(i)(0)) = 1;
        }
      }
    }
  }
};

} // namespace models
} // namespace mlpack

#endif