@inproceedings{1857,
  abstract     = {Sharing information between multiple tasks enables algorithms to achieve good generalization performance even from small amounts of training data. However, in a realistic scenario of multi-task learning not all tasks are equally related to each other, hence it could be advantageous to transfer information only between the most related tasks. In this work we propose an approach that processes multiple tasks in a sequence with sharing between subsequent tasks instead of solving all tasks jointly. Subsequently, we address the question of curriculum learning of tasks, i.e. finding the best order of tasks to be learned. Our approach is based on a generalization bound criterion for choosing the task order that optimizes the average expected classification performance over all tasks. Our experimental results show that learning multiple related tasks sequentially can be more effective than learning them jointly, the order in which tasks are being solved affects the overall performance, and that our model is able to automatically discover the favourable order of tasks. },
  author       = {Pentina, Anastasia and Sharmanska, Viktoriia and Lampert, Christoph},
  location     = {Boston, MA, United States},
  pages        = {5492 -- 5500},
  publisher    = {IEEE},
  title        = {{Curriculum learning of multiple tasks}},
  doi          = {10.1109/CVPR.2015.7299188},
  year         = {2015},
}

@phdthesis{1401,
  abstract     = {The human ability to recognize objects in complex scenes has driven research in the computer vision field over couple of decades. This thesis focuses on the object recognition task in images. That is, given the image, we want the computer system to be able to predict the class of the object that appears in the image. A recent successful attempt to bridge semantic understanding of the image perceived by humans and by computers uses attribute-based models. Attributes are semantic properties of the objects shared across different categories, which humans and computers can decide on. To explore the attribute-based models we take a statistical machine learning approach, and address two key learning challenges in view of object recognition task: learning augmented attributes as mid-level discriminative feature representation, and learning with attributes as privileged information. Our main contributions are parametric and non-parametric models and algorithms to solve these frameworks. In the parametric approach, we explore an autoencoder model combined with the large margin nearest neighbor principle for mid-level feature learning, and linear support vector machines for learning with privileged information. In the non-parametric approach, we propose a supervised Indian Buffet Process for automatic augmentation of semantic attributes, and explore the Gaussian Processes classification framework for learning with privileged information. A thorough experimental analysis shows the effectiveness of the proposed models in both parametric and non-parametric views.},
  author       = {Sharmanska, Viktoriia},
  issn         = {2663-337X},
  pages        = {144},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Learning with attributes for object recognition: Parametric and non-parametrics views}},
  doi          = {10.15479/at:ista:1401},
  year         = {2015},
}

@inproceedings{2033,
  abstract     = {The learning with privileged information setting has recently attracted a lot of attention within the machine learning community, as it allows the integration of additional knowledge into the training process of a classifier, even when this comes in the form of a data modality that is not available at test time. Here, we show that privileged information can naturally be treated as noise in the latent function of a Gaussian process classifier (GPC). That is, in contrast to the standard GPC setting, the latent function is not just a nuisance but a feature: it becomes a natural measure of confidence about the training data by modulating the slope of the GPC probit likelihood function. Extensive experiments on public datasets show that the proposed GPC method using privileged noise, called GPC+, improves over a standard GPC without privileged knowledge, and also over the current state-of-the-art SVM-based method, SVM+. Moreover, we show that advanced neural networks and deep learning methods can be compressed as privileged information.},
  author       = {Hernandez Lobato, Daniel and Sharmanska, Viktoriia and Kersting, Kristian and Lampert, Christoph and Quadrianto, Novi},
  booktitle    = {Advances in Neural Information Processing Systems},
  location     = {Montreal, Canada},
  number       = {January},
  pages        = {837--845},
  publisher    = {Neural Information Processing Systems},
  title        = {{Mind the nuisance: Gaussian process classification using privileged noise}},
  volume       = {1},
  year         = {2014},
}

@inproceedings{2293,
  abstract     = {Many computer vision problems have an asymmetric distribution of information between training and test time. In this work, we study the case where we are given additional information about the training data, which however will not be available at test time. This situation is called learning using privileged information (LUPI). We introduce two maximum-margin techniques that are able to make use of this additional source of information, and we show that the framework is applicable to several scenarios that have been studied in computer vision before. Experiments with attributes, bounding boxes, image tags and rationales as additional information in object classification show promising results.},
  author       = {Sharmanska, Viktoriia and Quadrianto, Novi and Lampert, Christoph},
  location     = {Sydney, Australia},
  pages        = {825 -- 832},
  publisher    = {IEEE},
  title        = {{Learning to rank using privileged information}},
  doi          = {10.1109/ICCV.2013.107},
  year         = {2013},
}

@inproceedings{2520,
  abstract     = {We propose a probabilistic model to infer supervised latent variables in
the Hamming space from observed data. Our model allows simultaneous
inference of the number of binary latent variables, and their values. The
latent variables preserve neighbourhood structure of the data in a sense
that objects in the same semantic concept have similar latent values, and
objects in different concepts have dissimilar latent values. We formulate
the supervised infinite latent variable problem based on an intuitive
principle of pulling objects together if they are of the same type, and
pushing them apart if they are not. We then combine this principle with a
flexible Indian Buffet Process prior on the latent variables. We show that
the inferred supervised latent variables can be directly used to perform a
nearest neighbour search for the purpose of retrieval.  We introduce a new
application of dynamically extending hash codes, and show how to
effectively couple the structure of the hash codes with continuously
growing structure of the neighbourhood preserving infinite latent feature
space.},
  author       = {Quadrianto, Novi and Sharmanska, Viktoriia and Knowles, David and Ghahramani, Zoubin},
  booktitle    = {Proceedings of the 29th conference uncertainty in Artificial Intelligence},
  isbn         = {9780974903996},
  location     = {Bellevue, WA, United States},
  pages        = {527 -- 536},
  publisher    = {AUAI Press},
  title        = {{The supervised IBP: Neighbourhood preserving infinite latent feature models}},
  year         = {2013},
}

@inproceedings{3125,
  abstract     = {We propose a new learning method to infer a mid-level feature representation that combines the advantage of semantic attribute representations with the higher expressive power of non-semantic features. The idea lies in augmenting an existing attribute-based representation with additional dimensions for which an autoencoder model is coupled with a large-margin principle. This construction allows a smooth transition between the zero-shot regime with no training example, the unsupervised regime with training examples but without class labels, and the supervised regime with training examples and with class labels. The resulting optimization problem can be solved efficiently, because several of the necessity steps have closed-form solutions. Through extensive experiments we show that the augmented representation achieves better results in terms of object categorization accuracy than the semantic representation alone.},
  author       = {Sharmanska, Viktoriia and Quadrianto, Novi and Lampert, Christoph},
  location     = {Florence, Italy},
  number       = {PART 5},
  pages        = {242 -- 255},
  publisher    = {Springer},
  title        = {{Augmented attribute representations}},
  doi          = {10.1007/978-3-642-33715-4_18},
  volume       = {7576},
  year         = {2012},
}