[{"related_material":{"record":[{"status":"deleted","relation":"dissertation_contains","id":"8331"},{"relation":"dissertation_contains","status":"public","id":"8390"}]},"external_id":{"arxiv":["2004.12623"]},"year":"2020","conference":{"start_date":"2020-03-01","end_date":"2020-03-05","name":"WACV: Winter Conference on Applications of Computer Vision","location":" Snowmass Village, CO, United States"},"date_published":"2020-03-01T00:00:00Z","publication":"IEEE Winter Conference on Applications of Computer Vision","status":"public","type":"conference","_id":"7936","date_updated":"2023-09-07T13:16:17Z","publisher":"IEEE","article_processing_charge":"No","doi":"10.1109/WACV45572.2020.9093288","quality_controlled":"1","main_file_link":[{"url":"https://arxiv.org/abs/2004.12623","open_access":"1"}],"article_number":"1716-1725","department":[{"_id":"ChLa"}],"arxiv":1,"month":"03","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","citation":{"ama":"Royer A, Lampert C. Localizing grouped instances for efficient detection in low-resource scenarios. In: <i>IEEE Winter Conference on Applications of Computer Vision</i>. IEEE; 2020. doi:<a href=\"https://doi.org/10.1109/WACV45572.2020.9093288\">10.1109/WACV45572.2020.9093288</a>","ieee":"A. Royer and C. Lampert, “Localizing grouped instances for efficient detection in low-resource scenarios,” in <i>IEEE Winter Conference on Applications of Computer Vision</i>,  Snowmass Village, CO, United States, 2020.","short":"A. Royer, C. Lampert, in:, IEEE Winter Conference on Applications of Computer Vision, IEEE, 2020.","ista":"Royer A, Lampert C. 2020. Localizing grouped instances for efficient detection in low-resource scenarios. IEEE Winter Conference on Applications of Computer Vision. WACV: Winter Conference on Applications of Computer Vision, 1716–1725.","chicago":"Royer, Amélie, and Christoph Lampert. “Localizing Grouped Instances for Efficient Detection in Low-Resource Scenarios.” In <i>IEEE Winter Conference on Applications of Computer Vision</i>. IEEE, 2020. <a href=\"https://doi.org/10.1109/WACV45572.2020.9093288\">https://doi.org/10.1109/WACV45572.2020.9093288</a>.","apa":"Royer, A., &#38; Lampert, C. (2020). Localizing grouped instances for efficient detection in low-resource scenarios. In <i>IEEE Winter Conference on Applications of Computer Vision</i>.  Snowmass Village, CO, United States: IEEE. <a href=\"https://doi.org/10.1109/WACV45572.2020.9093288\">https://doi.org/10.1109/WACV45572.2020.9093288</a>","mla":"Royer, Amélie, and Christoph Lampert. “Localizing Grouped Instances for Efficient Detection in Low-Resource Scenarios.” <i>IEEE Winter Conference on Applications of Computer Vision</i>, 1716–1725, IEEE, 2020, doi:<a href=\"https://doi.org/10.1109/WACV45572.2020.9093288\">10.1109/WACV45572.2020.9093288</a>."},"language":[{"iso":"eng"}],"oa":1,"date_created":"2020-06-07T22:00:53Z","title":"Localizing grouped instances for efficient detection in low-resource scenarios","oa_version":"Preprint","day":"01","scopus_import":1,"author":[{"first_name":"Amélie","orcid":"0000-0002-8407-0705","full_name":"Royer, Amélie","id":"3811D890-F248-11E8-B48F-1D18A9856A87","last_name":"Royer"},{"full_name":"Lampert, Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","orcid":"0000-0001-8622-7887","first_name":"Christoph"}],"publication_status":"published","publication_identifier":{"isbn":["9781728165530"]},"abstract":[{"lang":"eng","text":"State-of-the-art detection systems are generally evaluated on their ability to exhaustively retrieve objects densely distributed in the image, across a wide variety of appearances and semantic categories. Orthogonal to this, many real-life object detection applications, for example in remote sensing, instead require dealing with large images that contain only a few small objects of a single class, scattered heterogeneously across the space. In addition, they are often subject to strict computational constraints, such as limited battery capacity and computing power.To tackle these more practical scenarios, we propose a novel flexible detection scheme that efficiently adapts to variable object sizes and densities: We rely on a sequence of detection stages, each of which has the ability to predict groups of objects as well as individuals. Similar to a detection cascade, this multi-stage architecture spares computational effort by discarding large irrelevant regions of the image early during the detection process. The ability to group objects provides further computational and memory savings, as it allows working with lower image resolutions in early stages, where groups are more easily detected than individuals, as they are more salient. We report experimental results on two aerial image datasets, and show that the proposed method is as accurate yet computationally more efficient than standard single-shot detectors, consistently across three different backbone architectures."}]},{"quality_controlled":"1","main_file_link":[{"open_access":"1","url":"http://arxiv.org/abs/2008.11995"}],"type":"conference","date_updated":"2023-09-07T13:16:17Z","_id":"7937","publisher":"IEEE","doi":"10.1109/WACV45572.2020.9093635","article_processing_charge":"No","date_published":"2020-03-01T00:00:00Z","conference":{"name":"WACV: Winter Conference on Applications of Computer Vision","start_date":"2020-03-01","end_date":"2020-03-05","location":"Snowmass Village, CO, United States"},"status":"public","publication":"2020 IEEE Winter Conference on Applications of Computer Vision","external_id":{"arxiv":["2008.11995"]},"related_material":{"record":[{"id":"8331","relation":"dissertation_contains","status":"deleted"},{"relation":"dissertation_contains","status":"public","id":"8390"}]},"year":"2020","publication_status":"published","publication_identifier":{"isbn":["9781728165530"]},"abstract":[{"lang":"eng","text":"Fine-tuning is a popular way of exploiting knowledge contained in a pre-trained convolutional network for a new visual recognition task. However, the orthogonal setting of transferring knowledge from a pretrained network to a visually different yet semantically close source is rarely considered: This commonly happens with real-life data, which is not necessarily as clean as the training source (noise, geometric transformations, different modalities, etc.).To tackle such scenarios, we introduce a new, generalized form of fine-tuning, called flex-tuning, in which any individual unit (e.g. layer) of a network can be tuned, and the most promising one is chosen automatically. In order to make the method appealing for practical use, we propose two lightweight and faster selection procedures that prove to be good approximations in practice. We study these selection criteria empirically across a variety of domain shifts and data scarcity scenarios, and show that fine-tuning individual units, despite its simplicity, yields very good results as an adaptation technique. As it turns out, in contrast to common practice, rather than the last fully-connected unit it is best to tune an intermediate or early one in many domain- shift scenarios, which is accurately detected by flex-tuning."}],"date_created":"2020-06-07T22:00:53Z","oa_version":"Preprint","title":"A flexible selection scheme for minimum-effort transfer learning","author":[{"first_name":"Amélie","orcid":"0000-0002-8407-0705","last_name":"Royer","id":"3811D890-F248-11E8-B48F-1D18A9856A87","full_name":"Royer, Amélie"},{"first_name":"Christoph","orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert"}],"day":"01","scopus_import":"1","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","citation":{"mla":"Royer, Amélie, and Christoph Lampert. “A Flexible Selection Scheme for Minimum-Effort Transfer Learning.” <i>2020 IEEE Winter Conference on Applications of Computer Vision</i>, 2180–2189, IEEE, 2020, doi:<a href=\"https://doi.org/10.1109/WACV45572.2020.9093635\">10.1109/WACV45572.2020.9093635</a>.","apa":"Royer, A., &#38; Lampert, C. (2020). A flexible selection scheme for minimum-effort transfer learning. In <i>2020 IEEE Winter Conference on Applications of Computer Vision</i>. Snowmass Village, CO, United States: IEEE. <a href=\"https://doi.org/10.1109/WACV45572.2020.9093635\">https://doi.org/10.1109/WACV45572.2020.9093635</a>","ista":"Royer A, Lampert C. 2020. A flexible selection scheme for minimum-effort transfer learning. 2020 IEEE Winter Conference on Applications of Computer Vision. WACV: Winter Conference on Applications of Computer Vision, 2180–2189.","chicago":"Royer, Amélie, and Christoph Lampert. “A Flexible Selection Scheme for Minimum-Effort Transfer Learning.” In <i>2020 IEEE Winter Conference on Applications of Computer Vision</i>. IEEE, 2020. <a href=\"https://doi.org/10.1109/WACV45572.2020.9093635\">https://doi.org/10.1109/WACV45572.2020.9093635</a>.","short":"A. Royer, C. Lampert, in:, 2020 IEEE Winter Conference on Applications of Computer Vision, IEEE, 2020.","ieee":"A. Royer and C. Lampert, “A flexible selection scheme for minimum-effort transfer learning,” in <i>2020 IEEE Winter Conference on Applications of Computer Vision</i>, Snowmass Village, CO, United States, 2020.","ama":"Royer A, Lampert C. A flexible selection scheme for minimum-effort transfer learning. In: <i>2020 IEEE Winter Conference on Applications of Computer Vision</i>. IEEE; 2020. doi:<a href=\"https://doi.org/10.1109/WACV45572.2020.9093635\">10.1109/WACV45572.2020.9093635</a>"},"language":[{"iso":"eng"}],"oa":1,"article_number":"2180-2189","department":[{"_id":"ChLa"}],"month":"03","arxiv":1},{"quality_controlled":"1","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/1711.05139"}],"page":"33-49","type":"book_chapter","date_updated":"2023-09-07T13:16:18Z","_id":"8092","editor":[{"full_name":"Singh, Richa","last_name":"Singh","first_name":"Richa"},{"last_name":"Vatsa","full_name":"Vatsa, Mayank","first_name":"Mayank"},{"last_name":"Patel","full_name":"Patel, Vishal M.","first_name":"Vishal M."},{"first_name":"Nalini","full_name":"Ratha, Nalini","last_name":"Ratha"}],"publisher":"Springer Nature","doi":"10.1007/978-3-030-30671-7_3","article_processing_charge":"No","date_published":"2020-01-08T00:00:00Z","status":"public","publication":"Domain Adaptation for Visual Understanding","external_id":{"arxiv":["1711.05139"]},"related_material":{"record":[{"id":"8331","relation":"dissertation_contains","status":"deleted"},{"relation":"dissertation_contains","status":"public","id":"8390"}]},"year":"2020","publication_status":"published","publication_identifier":{"isbn":["9783030306717"]},"abstract":[{"text":"Image translation refers to the task of mapping images from a visual domain to another. Given two unpaired collections of images, we aim to learn a mapping between the corpus-level style of each collection, while preserving semantic content shared across the two domains. We introduce xgan, a dual adversarial auto-encoder, which captures a shared representation of the common domain semantic content in an unsupervised way, while jointly learning the domain-to-domain image translations in both directions. We exploit ideas from the domain adaptation literature and define a semantic consistency loss which encourages the learned embedding to preserve semantics shared across domains. We report promising qualitative results for the task of face-to-cartoon translation. The cartoon dataset we collected for this purpose, “CartoonSet”, is also publicly available as a new benchmark for semantic style transfer at https://google.github.io/cartoonset/index.html.","lang":"eng"}],"date_created":"2020-07-05T22:00:46Z","oa_version":"Preprint","title":"XGAN: Unsupervised image-to-image translation for many-to-many mappings","author":[{"orcid":"0000-0002-8407-0705","first_name":"Amélie","last_name":"Royer","full_name":"Royer, Amélie","id":"3811D890-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Bousmalis","full_name":"Bousmalis, Konstantinos","first_name":"Konstantinos"},{"first_name":"Stephan","last_name":"Gouws","full_name":"Gouws, Stephan"},{"first_name":"Fred","full_name":"Bertsch, Fred","last_name":"Bertsch"},{"last_name":"Mosseri","full_name":"Mosseri, Inbar","first_name":"Inbar"},{"full_name":"Cole, Forrester","last_name":"Cole","first_name":"Forrester"},{"first_name":"Kevin","full_name":"Murphy, Kevin","last_name":"Murphy"}],"day":"08","scopus_import":"1","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","citation":{"ieee":"A. Royer <i>et al.</i>, “XGAN: Unsupervised image-to-image translation for many-to-many mappings,” in <i>Domain Adaptation for Visual Understanding</i>, R. Singh, M. Vatsa, V. M. Patel, and N. Ratha, Eds. Springer Nature, 2020, pp. 33–49.","short":"A. Royer, K. Bousmalis, S. Gouws, F. Bertsch, I. Mosseri, F. Cole, K. Murphy, in:, R. Singh, M. Vatsa, V.M. Patel, N. Ratha (Eds.), Domain Adaptation for Visual Understanding, Springer Nature, 2020, pp. 33–49.","ama":"Royer A, Bousmalis K, Gouws S, et al. XGAN: Unsupervised image-to-image translation for many-to-many mappings. In: Singh R, Vatsa M, Patel VM, Ratha N, eds. <i>Domain Adaptation for Visual Understanding</i>. Springer Nature; 2020:33-49. doi:<a href=\"https://doi.org/10.1007/978-3-030-30671-7_3\">10.1007/978-3-030-30671-7_3</a>","apa":"Royer, A., Bousmalis, K., Gouws, S., Bertsch, F., Mosseri, I., Cole, F., &#38; Murphy, K. (2020). XGAN: Unsupervised image-to-image translation for many-to-many mappings. In R. Singh, M. Vatsa, V. M. Patel, &#38; N. Ratha (Eds.), <i>Domain Adaptation for Visual Understanding</i> (pp. 33–49). Springer Nature. <a href=\"https://doi.org/10.1007/978-3-030-30671-7_3\">https://doi.org/10.1007/978-3-030-30671-7_3</a>","mla":"Royer, Amélie, et al. “XGAN: Unsupervised Image-to-Image Translation for Many-to-Many Mappings.” <i>Domain Adaptation for Visual Understanding</i>, edited by Richa Singh et al., Springer Nature, 2020, pp. 33–49, doi:<a href=\"https://doi.org/10.1007/978-3-030-30671-7_3\">10.1007/978-3-030-30671-7_3</a>.","chicago":"Royer, Amélie, Konstantinos Bousmalis, Stephan Gouws, Fred Bertsch, Inbar Mosseri, Forrester Cole, and Kevin Murphy. “XGAN: Unsupervised Image-to-Image Translation for Many-to-Many Mappings.” In <i>Domain Adaptation for Visual Understanding</i>, edited by Richa Singh, Mayank Vatsa, Vishal M. Patel, and Nalini Ratha, 33–49. Springer Nature, 2020. <a href=\"https://doi.org/10.1007/978-3-030-30671-7_3\">https://doi.org/10.1007/978-3-030-30671-7_3</a>.","ista":"Royer A, Bousmalis K, Gouws S, Bertsch F, Mosseri I, Cole F, Murphy K. 2020.XGAN: Unsupervised image-to-image translation for many-to-many mappings. In: Domain Adaptation for Visual Understanding. , 33–49."},"language":[{"iso":"eng"}],"oa":1,"department":[{"_id":"ChLa"}],"month":"01","arxiv":1},{"related_material":{"record":[{"id":"8390","relation":"dissertation_contains","status":"public"}]},"year":"2020","publication":"Proceedings of the 30th International Conference on Automated Planning and Scheduling","status":"public","project":[{"call_identifier":"FWF","name":"Game Theory","grant_number":"S11407","_id":"25863FF4-B435-11E9-9278-68D0E5697425"}],"conference":{"end_date":"2020-10-30","start_date":"2020-10-26","name":"ICAPS: International Conference on Automated Planning and Scheduling","location":"Nancy, France"},"acknowledgement":"Krishnendu Chatterjee is supported by the Austrian ScienceFund (FWF) NFN Grant No. S11407-N23 (RiSE/SHiNE),and COST Action GAMENET. Petr Novotn ́y is supported bythe Czech Science Foundation grant No. GJ19-15134Y.","date_published":"2020-06-01T00:00:00Z","publisher":"Association for the Advancement of Artificial Intelligence","article_processing_charge":"No","type":"conference","_id":"8193","date_updated":"2023-09-07T13:16:18Z","page":"48-56","quality_controlled":"1","month":"06","department":[{"_id":"KrCh"}],"language":[{"iso":"eng"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","citation":{"short":"K. Chatterjee, M. Chmelik, D. Karkhanis, P. Novotný, A. Royer, in:, Proceedings of the 30th International Conference on Automated Planning and Scheduling, Association for the Advancement of Artificial Intelligence, 2020, pp. 48–56.","ieee":"K. Chatterjee, M. Chmelik, D. Karkhanis, P. Novotný, and A. Royer, “Multiple-environment Markov decision processes: Efficient analysis and applications,” in <i>Proceedings of the 30th International Conference on Automated Planning and Scheduling</i>, Nancy, France, 2020, vol. 30, pp. 48–56.","ama":"Chatterjee K, Chmelik M, Karkhanis D, Novotný P, Royer A. Multiple-environment Markov decision processes: Efficient analysis and applications. In: <i>Proceedings of the 30th International Conference on Automated Planning and Scheduling</i>. Vol 30. Association for the Advancement of Artificial Intelligence; 2020:48-56.","mla":"Chatterjee, Krishnendu, et al. “Multiple-Environment Markov Decision Processes: Efficient Analysis and Applications.” <i>Proceedings of the 30th International Conference on Automated Planning and Scheduling</i>, vol. 30, Association for the Advancement of Artificial Intelligence, 2020, pp. 48–56.","apa":"Chatterjee, K., Chmelik, M., Karkhanis, D., Novotný, P., &#38; Royer, A. (2020). Multiple-environment Markov decision processes: Efficient analysis and applications. In <i>Proceedings of the 30th International Conference on Automated Planning and Scheduling</i> (Vol. 30, pp. 48–56). Nancy, France: Association for the Advancement of Artificial Intelligence.","chicago":"Chatterjee, Krishnendu, Martin Chmelik, Deep Karkhanis, Petr Novotný, and Amélie Royer. “Multiple-Environment Markov Decision Processes: Efficient Analysis and Applications.” In <i>Proceedings of the 30th International Conference on Automated Planning and Scheduling</i>, 30:48–56. Association for the Advancement of Artificial Intelligence, 2020.","ista":"Chatterjee K, Chmelik M, Karkhanis D, Novotný P, Royer A. 2020. Multiple-environment Markov decision processes: Efficient analysis and applications. Proceedings of the 30th International Conference on Automated Planning and Scheduling. ICAPS: International Conference on Automated Planning and Scheduling vol. 30, 48–56."},"oa_version":"None","title":"Multiple-environment Markov decision processes: Efficient analysis and applications","day":"01","scopus_import":"1","author":[{"last_name":"Chatterjee","full_name":"Chatterjee, Krishnendu","id":"2E5DCA20-F248-11E8-B48F-1D18A9856A87","first_name":"Krishnendu","orcid":"0000-0002-4561-241X"},{"first_name":"Martin","last_name":"Chmelik","id":"3624234E-F248-11E8-B48F-1D18A9856A87","full_name":"Chmelik, Martin"},{"first_name":"Deep","full_name":"Karkhanis, Deep","last_name":"Karkhanis"},{"full_name":"Novotný, Petr","id":"3CC3B868-F248-11E8-B48F-1D18A9856A87","last_name":"Novotný","first_name":"Petr"},{"first_name":"Amélie","orcid":"0000-0002-8407-0705","last_name":"Royer","id":"3811D890-F248-11E8-B48F-1D18A9856A87","full_name":"Royer, Amélie"}],"date_created":"2020-08-02T22:00:58Z","volume":30,"intvolume":"        30","abstract":[{"lang":"eng","text":"Multiple-environment Markov decision processes (MEMDPs) are MDPs equipped with not one, but multiple probabilistic transition functions, which represent the various possible unknown environments. While the previous research on MEMDPs focused on theoretical properties for long-run average payoff, we study them with discounted-sum payoff and focus on their practical advantages and applications. MEMDPs can be viewed as a special case of Partially observable and Mixed observability MDPs: the state of the system is perfectly observable, but not the environment. We show that the specific structure of MEMDPs allows for more efficient algorithmic analysis, in particular for faster belief updates. We demonstrate the applicability of MEMDPs in several domains. In particular, we formalize the sequential decision-making approach to contextual recommendation systems as MEMDPs and substantially improve over the previous MDP approach."}],"publication_status":"published","publication_identifier":{"issn":["23340835"],"eissn":["23340843"]}},{"date_published":"2020-09-14T00:00:00Z","acknowledgement":"Last but not least, I would like to acknowledge the support of the IST IT and scientific computing team for helping provide a great work environment.","degree_awarded":"PhD","status":"public","year":"2020","related_material":{"record":[{"id":"7936","relation":"part_of_dissertation","status":"public"},{"id":"7937","status":"public","relation":"part_of_dissertation"},{"id":"8193","relation":"part_of_dissertation","status":"public"},{"id":"8092","status":"public","relation":"part_of_dissertation"},{"status":"public","relation":"part_of_dissertation","id":"911"}]},"page":"197","ddc":["000"],"_id":"8390","date_updated":"2023-10-16T10:04:02Z","type":"dissertation","alternative_title":["ISTA Thesis"],"article_processing_charge":"No","doi":"10.15479/AT:ISTA:8390","publisher":"Institute of Science and Technology Austria","citation":{"ama":"Royer A. Leveraging structure in Computer Vision tasks for flexible Deep Learning models. 2020. doi:<a href=\"https://doi.org/10.15479/AT:ISTA:8390\">10.15479/AT:ISTA:8390</a>","short":"A. Royer, Leveraging Structure in Computer Vision Tasks for Flexible Deep Learning Models, Institute of Science and Technology Austria, 2020.","ieee":"A. Royer, “Leveraging structure in Computer Vision tasks for flexible Deep Learning models,” Institute of Science and Technology Austria, 2020.","ista":"Royer A. 2020. Leveraging structure in Computer Vision tasks for flexible Deep Learning models. Institute of Science and Technology Austria.","chicago":"Royer, Amélie. “Leveraging Structure in Computer Vision Tasks for Flexible Deep Learning Models.” Institute of Science and Technology Austria, 2020. <a href=\"https://doi.org/10.15479/AT:ISTA:8390\">https://doi.org/10.15479/AT:ISTA:8390</a>.","mla":"Royer, Amélie. <i>Leveraging Structure in Computer Vision Tasks for Flexible Deep Learning Models</i>. Institute of Science and Technology Austria, 2020, doi:<a href=\"https://doi.org/10.15479/AT:ISTA:8390\">10.15479/AT:ISTA:8390</a>.","apa":"Royer, A. (2020). <i>Leveraging structure in Computer Vision tasks for flexible Deep Learning models</i>. Institute of Science and Technology Austria. <a href=\"https://doi.org/10.15479/AT:ISTA:8390\">https://doi.org/10.15479/AT:ISTA:8390</a>"},"user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","oa":1,"language":[{"iso":"eng"}],"department":[{"_id":"ChLa"}],"file":[{"content_type":"application/pdf","access_level":"open_access","file_name":"2020_Thesis_Royer.pdf","success":1,"checksum":"c914d2f88846032f3d8507734861b6ee","relation":"main_file","date_updated":"2020-09-14T13:39:14Z","creator":"dernst","date_created":"2020-09-14T13:39:14Z","file_size":30224591,"file_id":"8391"},{"relation":"main_file","checksum":"ae98fb35d912cff84a89035ae5794d3c","file_name":"thesis_sources.zip","access_level":"closed","content_type":"application/x-zip-compressed","file_id":"8392","file_size":74227627,"date_created":"2020-09-14T13:39:17Z","creator":"dernst","date_updated":"2020-09-14T13:39:17Z"}],"supervisor":[{"orcid":"0000-0001-8622-7887","first_name":"Christoph","last_name":"Lampert","full_name":"Lampert, Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87"}],"month":"09","file_date_updated":"2020-09-14T13:39:17Z","publication_status":"published","publication_identifier":{"issn":["2663-337X"],"isbn":["978-3-99078-007-7"]},"has_accepted_license":"1","acknowledged_ssus":[{"_id":"CampIT"},{"_id":"ScienComp"}],"license":"https://creativecommons.org/licenses/by-nc-sa/4.0/","abstract":[{"lang":"eng","text":"Deep neural networks have established a new standard for data-dependent feature extraction pipelines in the Computer Vision literature. Despite their remarkable performance in the standard supervised learning scenario, i.e. when models are trained with labeled data and tested on samples that follow a similar distribution, neural networks have been shown to struggle with more advanced generalization abilities, such as transferring knowledge across visually different domains, or generalizing to new unseen combinations of known concepts. In this thesis we argue that, in contrast to the usual black-box behavior of neural networks, leveraging more structured internal representations is a promising direction\r\nfor tackling such problems. In particular, we focus on two forms of structure. First, we tackle modularity: We show that (i) compositional architectures are a natural tool for modeling reasoning tasks, in that they efficiently capture their combinatorial nature, which is key for generalizing beyond the compositions seen during training. We investigate how to to learn such models, both formally and experimentally, for the task of abstract visual reasoning. Then, we show that (ii) in some settings, modularity allows us to efficiently break down complex tasks into smaller, easier, modules, thereby improving computational efficiency; We study this behavior in the context of generative models for colorization, as well as for small objects detection. Secondly, we investigate the inherently layered structure of representations learned by neural networks, and analyze its role in the context of transfer learning and domain adaptation across visually\r\ndissimilar domains. "}],"tmp":{"image":"/images/cc_by_nc_sa.png","name":"Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)","legal_code_url":"https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode","short":"CC BY-NC-SA (4.0)"},"date_created":"2020-09-14T13:42:09Z","day":"14","author":[{"last_name":"Royer","id":"3811D890-F248-11E8-B48F-1D18A9856A87","full_name":"Royer, Amélie","orcid":"0000-0002-8407-0705","first_name":"Amélie"}],"oa_version":"Published Version","title":"Leveraging structure in Computer Vision tasks for flexible Deep Learning models"},{"quality_controlled":"1","page":"85.1-85.12","ddc":["000"],"date_updated":"2023-10-16T10:04:02Z","_id":"911","type":"conference","doi":"10.5244/c.31.85","article_processing_charge":"No","publisher":"BMVA Press","ec_funded":1,"conference":{"name":"BMVC: British Machine Vision Conference","end_date":"2017-09-07","start_date":"2017-09-04","location":"London, United Kingdom"},"date_published":"2017-09-01T00:00:00Z","project":[{"call_identifier":"FP7","grant_number":"308036","name":"Lifelong Learning of Visual Scene Understanding","_id":"2532554C-B435-11E9-9278-68D0E5697425"}],"status":"public","publist_id":"6532","year":"2017","external_id":{"arxiv":["1705.04258"]},"related_material":{"record":[{"relation":"dissertation_contains","status":"public","id":"8390"}]},"publication_identifier":{"eisbn":["190172560X"]},"publication_status":"published","file_date_updated":"2020-08-10T07:14:33Z","has_accepted_license":"1","abstract":[{"text":"We develop a probabilistic technique for colorizing grayscale natural images. In light of the intrinsic uncertainty of this task, the proposed probabilistic framework has numerous desirable properties. In particular, our model is able to produce multiple plausible and vivid colorizations for a given grayscale image and is one of the first colorization models to provide a proper stochastic sampling scheme. Moreover, our training procedure is supported by a rigorous theoretical framework that does not require any ad hoc heuristics and allows for efficient modeling and learning of the joint pixel color distribution.We demonstrate strong quantitative and qualitative experimental results on the CIFAR-10 dataset and the challenging ILSVRC 2012 dataset.","lang":"eng"}],"date_created":"2018-12-11T11:49:09Z","author":[{"first_name":"Amélie","orcid":"0000-0002-8407-0705","full_name":"Royer, Amélie","id":"3811D890-F248-11E8-B48F-1D18A9856A87","last_name":"Royer"},{"full_name":"Kolesnikov, Alexander","id":"2D157DB6-F248-11E8-B48F-1D18A9856A87","last_name":"Kolesnikov","first_name":"Alexander"},{"first_name":"Christoph","orcid":"0000-0001-8622-7887","last_name":"Lampert","full_name":"Lampert, Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87"}],"scopus_import":"1","day":"01","title":"Probabilistic image colorization","oa_version":"Published Version","citation":{"mla":"Royer, Amélie, et al. <i>Probabilistic Image Colorization</i>. BMVA Press, 2017, p. 85.1-85.12, doi:<a href=\"https://doi.org/10.5244/c.31.85\">10.5244/c.31.85</a>.","apa":"Royer, A., Kolesnikov, A., &#38; Lampert, C. (2017). Probabilistic image colorization (p. 85.1-85.12). Presented at the BMVC: British Machine Vision Conference, London, United Kingdom: BMVA Press. <a href=\"https://doi.org/10.5244/c.31.85\">https://doi.org/10.5244/c.31.85</a>","ista":"Royer A, Kolesnikov A, Lampert C. 2017. Probabilistic image colorization. BMVC: British Machine Vision Conference, 85.1-85.12.","chicago":"Royer, Amélie, Alexander Kolesnikov, and Christoph Lampert. “Probabilistic Image Colorization,” 85.1-85.12. BMVA Press, 2017. <a href=\"https://doi.org/10.5244/c.31.85\">https://doi.org/10.5244/c.31.85</a>.","short":"A. Royer, A. Kolesnikov, C. Lampert, in:, BMVA Press, 2017, p. 85.1-85.12.","ieee":"A. Royer, A. Kolesnikov, and C. Lampert, “Probabilistic image colorization,” presented at the BMVC: British Machine Vision Conference, London, United Kingdom, 2017, p. 85.1-85.12.","ama":"Royer A, Kolesnikov A, Lampert C. Probabilistic image colorization. In: BMVA Press; 2017:85.1-85.12. doi:<a href=\"https://doi.org/10.5244/c.31.85\">10.5244/c.31.85</a>"},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa":1,"language":[{"iso":"eng"}],"department":[{"_id":"ChLa"}],"file":[{"relation":"main_file","file_name":"2017_BMVC_Royer.pdf","success":1,"content_type":"application/pdf","access_level":"open_access","file_id":"8224","file_size":1625363,"date_created":"2020-08-10T07:14:33Z","creator":"dernst","date_updated":"2020-08-10T07:14:33Z"}],"arxiv":1,"month":"09"}]