Publications
OUR RESEARCH
Scientific Publications
Here you can find the comprehensive list of publications from the members of the Research Center on Computer Vision and eXtended Reality (xRAI).
Use the tag cloud to filter papers based on specific research topics, or use the menus to filter by year, type of publication, or authors.
For each paper, you have the option to view additional details such as the Abstract, Links, and BibTex record.
Research is formalized curiosity. It is poking and prying with a purpose
Zora Neale Hurston
2025
Maggioli, Filippo; Baieri, Daniele; Rodolà, Emanuele; Melzi, Simone
ReMatching: Low-Resolution Representations for Scalable Shape Correspondence Proceedings Article
In: Leonardis, Aleš; Ricci, Elisa; Roth, Stefan; Russakovsky, Olga; Sattler, Torsten; Varol, Gül (Ed.): Computer Vision – ECCV 2024, pp. 183–200, Springer Nature Switzerland, Cham, 2025, ISBN: 978-3-031-72912-6 978-3-031-72913-3.
Abstract | Links | BibTeX | Tags: Computer graphics, Computer Vision and Pattern Recognition, Geometry Processing, Shape Analysis, Shape Matching, Spectral Geometry
@inproceedings{maggioli_rematching_2025,
title = {ReMatching: Low-Resolution Representations for Scalable Shape Correspondence},
author = {Filippo Maggioli and Daniele Baieri and Emanuele Rodolà and Simone Melzi},
editor = {Aleš Leonardis and Elisa Ricci and Stefan Roth and Olga Russakovsky and Torsten Sattler and Gül Varol},
url = {https://link.springer.com/10.1007/978-3-031-72913-3_11},
doi = {10.1007/978-3-031-72913-3_11},
isbn = {978-3-031-72912-6 978-3-031-72913-3},
year = {2025},
date = {2025-01-01},
urldate = {2025-03-30},
booktitle = {Computer Vision – ECCV 2024},
volume = {15095},
pages = {183–200},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {We introduce ReMatching, a novel shape correspondence solution based on the functional maps framework. Our method, by exploiting a new and appropriate re-meshing paradigm, can target shape-matching tasks even on meshes counting millions of vertices, where the original functional maps does not apply or requires a massive computational cost. The core of our procedure is a time-efficient remeshing algorithm which constructs a low-resolution geometry while acting conservatively on the original topology and metric. These properties allow translating the functional maps optimization problem on the resulting low-resolution representation, thus enabling efficient computation of correspondences with functional map approaches. Finally, we propose an efficient technique for extending the estimated correspondence to the original meshes. We show that our method is more efficient and effective through quantitative and qualitative comparisons, outperforming state-of-the-art pipelines in quality and computational cost.},
keywords = {Computer graphics, Computer Vision and Pattern Recognition, Geometry Processing, Shape Analysis, Shape Matching, Spectral Geometry},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
Agostinelli, Thomas; Generosi, Andrea; Ceccacci, Silvia; Mengoni, Maura
Validation of computer vision-based ergonomic risk assessment tools for real manufacturing environments Journal Article
In: Scientific Reports, vol. 14, no. 1, pp. 27785, 2024, ISSN: 2045-2322.
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Human-Centered Design, Industry 4.0
@article{agostinelli_validation_2024,
title = {Validation of computer vision-based ergonomic risk assessment tools for real manufacturing environments},
author = {Thomas Agostinelli and Andrea Generosi and Silvia Ceccacci and Maura Mengoni},
url = {https://www.nature.com/articles/s41598-024-79373-4},
doi = {10.1038/s41598-024-79373-4},
issn = {2045-2322},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-28},
journal = {Scientific Reports},
volume = {14},
number = {1},
pages = {27785},
abstract = {This study contributes to understanding semi-automated ergonomic risk assessments in industrial manufacturing environments, proposing a practical tool for enhancing worker safety and operational efficiency. In the Industry 5.0 era, the human-centric approach in manufacturing is crucial, especially considering the aging workforce and the dynamic nature of the entire modern industrial sector, today integrating digital technology, automation, and sustainable practices to enhance productivity and environmental responsibility. This approach aims to adapt work conditions to individual capabilities, addressing the high incidence of work-related musculoskeletal disorders (MSDs). The traditional, subjective methods of ergonomic assessment are inadequate for dynamic settings, highlighting the need for affordable, automatic tools for continuous monitoring of workers’ postures to evaluate ergonomic risks effectively during tasks. To enable this perspective, 2D RGB Motion Capture (MoCap) systems based on computer vision currently seem the technologies of choice, given their low intrusiveness, cost, and implementation effort. However, the reliability and applicability of these systems in the dynamic and varied manufacturing environment remain uncertain. This research benchmarks various literature proposed MoCap tools and examines the viability of MoCap systems for ergonomic risk assessments in Industry 5.0 by exploiting one of the benchmarked semi-automated, low-cost and non-intrusive 2D RGB MoCap system, capable of continuously monitoring and analysing workers’ postures. By conducting experiments across varied manufacturing environments, this research evaluates the system’s effectiveness in assessing ergonomic risks and its adaptability to different production lines. Results reveal that the accuracy of risk assessments varies by specific environmental conditions and workstation setups. Although these systems are not yet optimized for expert-level risk certification, they offer significant potential for enhancing workplace safety and efficiency by providing continuous posture monitoring. Future improvements could explore advanced computational techniques like machine learning to refine ergonomic assessments further.},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Human-Centered Design, Industry 4.0},
pubstate = {published},
tppubtype = {article}
}
Jamali, Reza; Generosi, Andrea; Villafan, Josè Yuri; Mengoni, Maura; Pelagalli, Leonardo; Battista, Gianmarco; Martarelli, Milena; Chiariotti, Paolo; Mansi, Silvia Angela; Arnesano, Marco; Castellini, Paolo
Facial Expression Recognition for Measuring Jurors’ Attention in Acoustic Jury Tests Journal Article
In: Sensors, vol. 24, no. 7, pp. 2298, 2024, ISSN: 1424-8220.
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Emotion Recognition
@article{jamali_facial_2024,
title = {Facial Expression Recognition for Measuring Jurors’ Attention in Acoustic Jury Tests},
author = {Reza Jamali and Andrea Generosi and Josè Yuri Villafan and Maura Mengoni and Leonardo Pelagalli and Gianmarco Battista and Milena Martarelli and Paolo Chiariotti and Silvia Angela Mansi and Marco Arnesano and Paolo Castellini},
url = {https://www.mdpi.com/1424-8220/24/7/2298},
doi = {10.3390/s24072298},
issn = {1424-8220},
year = {2024},
date = {2024-04-01},
urldate = {2024-12-28},
journal = {Sensors},
volume = {24},
number = {7},
pages = {2298},
abstract = {The perception of sound greatly impacts users’ emotional states, expectations, affective relationships with products, and purchase decisions. Consequently, assessing the perceived quality of sounds through jury testing is crucial in product design. However, the subjective nature of jurors’ responses may limit the accuracy and reliability of jury test outcomes. This research explores the utility of facial expression analysis in jury testing to enhance response reliability and mitigate subjectivity. Some quantitative indicators allow the research hypothesis to be validated, such as the correlation between jurors’ emotional responses and valence values, the accuracy of jury tests, and the disparities between jurors’ questionnaire responses and the emotions measured by FER (facial expression recognition). Specifically, analysis of attention levels during different statuses reveals a discernible decrease in attention levels, with 70 percent of jurors exhibiting reduced attention levels in the ‘distracted’ state and 62 percent in the ‘heavy-eyed’ state. On the other hand, regression analysis shows that the correlation between jurors’ valence and their choices in the jury test increases when considering the data where the jurors are attentive. The correlation highlights the potential of facial expression analysis as a reliable tool for assessing juror engagement. The findings suggest that integrating facial expression recognition can enhance the accuracy of jury testing in product design by providing a more dependable assessment of user responses and deeper insights into participants’ reactions to auditory stimuli.},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Emotion Recognition},
pubstate = {published},
tppubtype = {article}
}
Agnolucci, Lorenzo; Galteri, Leonardo; Bertini, Marco; Bimbo, Alberto Del
Arniqa: Learning distortion manifold for image quality assessment Proceedings Article
In: Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision, pp. 189–198, 2024, (tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Image Representation, No-Reference Image Quality Assessment (NR-IQA), Quality Assessment
@inproceedings{agnolucciArniqaLearningDistortion2024,
title = {Arniqa: Learning distortion manifold for image quality assessment},
author = {Lorenzo Agnolucci and Leonardo Galteri and Marco Bertini and Alberto Del Bimbo},
url = {https://openaccess.thecvf.com/content/WACV2024/papers/Agnolucci_ARNIQA_Learning_Distortion_Manifold_for_Image_Quality_Assessment_WACV_2024_paper.pdf},
doi = {10.1109/WACV57701.2024.00026},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
pages = {189–198},
abstract = {No-Reference Image Quality Assessment (NR-IQA) aims to develop methods to measure image quality in alignment with human perception without the need for a high-quality reference image. In this work, we propose a self-supervised approach named ARNIQA (leArning distoRtion maNifold for Image Quality Assessment) for modeling the image distortion manifold to obtain quality representations in an intrinsic manner. First, we introduce an image degradation model that randomly composes ordered sequences of consecutively applied distortions. In this way, we can synthetically degrade images with a large variety of degradation patterns. Second, we propose to train our model by maximizing the similarity between the representations of patches of different images distorted equally, despite varying content. Thus, images degraded in the same manner correspond to neighboring positions within the distortion manifold. Finally, we map the image representations to the quality scores with a simple linear regressor, thus without fine-tuning the encoder weights. The experiments show that our approach achieves state-of-the-art performance on several datasets. In addition, ARNIQA demonstrates improved data efficiency, generalization capabilities, and robustness compared to competing methods. The code and the model are publicly available at https://github. com/miccunifi/ARNIQA.},
note = {tex.copyright: All rights reserved},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Image Representation, No-Reference Image Quality Assessment (NR-IQA), Quality Assessment},
pubstate = {published},
tppubtype = {inproceedings}
}
Agnolucci, Lorenzo; Galteri, Leonardo; Bertini, Marco; Bimbo, Alberto Del
Reference-based restoration of digitized analog videotapes Proceedings Article
In: Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision, pp. 1659–1668, 2024, (tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Compression Artifact Removal, Computer Vision and Pattern Recognition, Digital Archiving, Image Processing, Transformer Networks
@inproceedings{agnolucciReferencebasedRestorationDigitized2024,
title = {Reference-based restoration of digitized analog videotapes},
author = {Lorenzo Agnolucci and Leonardo Galteri and Marco Bertini and Alberto Del Bimbo},
url = {https://openaccess.thecvf.com/content/WACV2024/papers/Agnolucci_Reference-Based_Restoration_of_Digitized_Analog_Videotapes_WACV_2024_paper.pdf},
doi = {10.1109/WACV57701.2024.00168},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
pages = {1659–1668},
abstract = {Analog magnetic tapes have been the main video data storage device for several decades. Videos stored on analog videotapes exhibit unique degradation patterns caused by tape aging and reader device malfunctioning that are different from those observed in film and digital video restoration tasks. In this work, we present a reference-based approach for the resToration of digitized Analog videotaPEs (TAPE). We leverage CLIP for zero-shot artifact detection to identify the cleanest frames of each video through textual prompts describing different artifacts. Then, we select the clean frames most similar to the input ones and employ them as references. We design a transformer-based Swin-UNet network that exploits both neighboring and reference frames via our Multi-Reference Spatial Feature Fusion (MRSFF) blocks. MRSFF blocks rely on cross-attention and attention pooling to take advantage of the most useful parts of each reference frame. To address the absence of ground truth in real-world videos, we create a synthetic dataset of videos exhibiting artifacts that closely resemble those commonly found in analog videotapes. Both quantitative and qualitative experiments show the effectiveness of our approach compared to other state-of-the-art methods. The code, the model, and the synthetic dataset are publicly available at https://github.com/miccunifi/TAPE.},
note = {tex.copyright: All rights reserved},
keywords = {Artificial Intelligence, Compression Artifact Removal, Computer Vision and Pattern Recognition, Digital Archiving, Image Processing, Transformer Networks},
pubstate = {published},
tppubtype = {inproceedings}
}
Agnolucci, Lorenzo; Galteri, Leonardo; Bertini, Marco
Quality-Aware Image-Text Alignment for Real-World Image Quality Assessment Journal Article
In: arXiv preprint arXiv:2403.11176, 2024, (arXiv: 2403.11176 tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Image Processing, Quality Assessment, Self-Supervised Learning, Vision and Language
@article{agnolucciQualityAwareImageTextAlignment2024,
title = {Quality-Aware Image-Text Alignment for Real-World Image Quality Assessment},
author = {Lorenzo Agnolucci and Leonardo Galteri and Marco Bertini},
url = {https://arxiv.org/abs/2403.11176},
doi = {10.48550/ARXIV.2403.11176},
year = {2024},
date = {2024-01-01},
journal = {arXiv preprint arXiv:2403.11176},
abstract = {No-Reference Image Quality Assessment (NR-IQA) focuses on designing methods to measure image quality in alignment with human perception when a high-quality reference image is unavailable. The reliance on annotated Mean Opinion Scores (MOS) in the majority of state-of-the-art NR-IQA approaches limits their scalability and broader applicability to real-world scenarios. To overcome this limitation, we propose QualiCLIP (Quality-aware CLIP), a CLIP-based self-supervised opinion-unaware method that does not require labeled MOS. In particular, we introduce a quality-aware image-text alignment strategy to make CLIP generate representations that correlate with the inherent quality of the images. Starting from pristine images, we synthetically degrade them with increasing levels of intensity. Then, we train CLIP to rank these degraded images based on their similarity to quality-related antonym text prompts, while guaranteeing consistent representations for images with comparable quality. Our method achieves state-of-the-art performance on several datasets with authentic distortions. Moreover, despite not requiring MOS, QualiCLIP outperforms supervised methods when their training dataset differs from the testing one, thus proving to be more suitable for real-world scenarios. Furthermore, our approach demonstrates greater robustness and improved explainability than competing methods. The code and the model are publicly available at https://github.com/miccunifi/QualiCLIP.},
note = {arXiv: 2403.11176
tex.copyright: All rights reserved},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Image Processing, Quality Assessment, Self-Supervised Learning, Vision and Language},
pubstate = {published},
tppubtype = {article}
}
Mengoni, Maura; Ceccacci, Silvia; Generosi, Andrea
Emotion Recognition and Affective Computing Book Section
In: Interaction Techniques and Technologies in Human-Computer Interaction, CRC Press, 2024, ISBN: 9781003490678.
Abstract | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Emotion Recognition
@incollection{mengoni_emotion_2024,
title = {Emotion Recognition and Affective Computing},
author = {Maura Mengoni and Silvia Ceccacci and Andrea Generosi},
isbn = {9781003490678},
year = {2024},
date = {2024-01-01},
booktitle = {Interaction Techniques and Technologies in Human-Computer Interaction},
publisher = {CRC Press},
abstract = {This chapter explores the challenging topic of emotion recognition by affective computing. The importance of considering and understanding people’s emotions in interaction design is discussed, focusing on the role of human emotions in the entire life cycle of human–system interaction as a means to innovate products and services. The measurement of emotions is also analyzed, including the classification of human emotions and recognition methods, as well as current techniques for measuring emotional responses. An emotional-based approach and related technologies are considered in managing the entire life cycle of human–system interaction as an innovation driver. This chapter also presents how to use affective computing in cross-transversal applications, concentrating on potential applications and different case studies. This chapter concludes with a look towards a world of emotional intelligence, where affective computing plays a crucial role in collecting and analyzing emotional data to support innovative product and service experiences.},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Emotion Recognition},
pubstate = {published},
tppubtype = {incollection}
}
Generosi, Andrea; Villafan, Josè Yuri; Montanari, Roberto; Mengoni, Maura
A Multimodal Approach to Understand Driver’s Distraction for DMS Proceedings Article
In: Antona, Margherita; Stephanidis, Constantine (Ed.): Universal Access in Human-Computer Interaction, pp. 250–270, Springer Nature Switzerland, Cham, 2024, ISBN: 9783031608759.
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Human Computer Interaction
@inproceedings{generosi_multimodal_2024,
title = {A Multimodal Approach to Understand Driver’s Distraction for DMS},
author = {Andrea Generosi and Josè Yuri Villafan and Roberto Montanari and Maura Mengoni},
editor = {Margherita Antona and Constantine Stephanidis},
doi = {10.1007/978-3-031-60875-9_17},
isbn = {9783031608759},
year = {2024},
date = {2024-01-01},
booktitle = {Universal Access in Human-Computer Interaction},
pages = {250–270},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {This study introduces a multimodal approach for enhancing the accuracy of Driver Monitoring Systems (DMS) in detecting driver distraction. By integrating data from vehicle control units with vision-based information, the research aims to address the limitations of current DMS. The experimental setup involves a driving simulator and advanced computer vision, deep learning technologies for facial expression recognition, and head rotation analysis. The findings suggest that combining various data types—behavioral, physiological, and emotional—can significantly improve DMS’s predictive capability. This research contributes to the development of more sophisticated, adaptive, and real-time systems for improving driver safety and advancing autonomous driving technologies.},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Human Computer Interaction},
pubstate = {published},
tppubtype = {inproceedings}
}
Maccarone, Francesca; Longari, Giorgio; Viganò, Giulio; Peruzzo, Denis; Maggioli, Filippo; Melzi, Simone
S4A: Scalable Spectral Statistical Shape Analysis Proceedings Article
In: Smart Tools and Applications in Graphics - Eurographics Italian Chapter Conference, The Eurographics Association, 2024, ISBN: 978-3-03868-265-3, (Edition: 1343).
Abstract | Links | BibTeX | Tags: Computational Geometry, Computer graphics, Computer Vision and Pattern Recognition, Healthcare, Shape Analysis, Shape Matching, Spectral Geometry
@inproceedings{maccarone_s4a_2024,
title = {S4A: Scalable Spectral Statistical Shape Analysis},
author = {Francesca Maccarone and Giorgio Longari and Giulio Viganò and Denis Peruzzo and Filippo Maggioli and Simone Melzi},
url = {https://diglib.eg.org/handle/10.2312/stag20241343},
doi = {10.2312/STAG.20241343},
isbn = {978-3-03868-265-3},
year = {2024},
date = {2024-01-01},
urldate = {2025-03-30},
booktitle = {Smart Tools and Applications in Graphics - Eurographics Italian Chapter Conference},
publisher = {The Eurographics Association},
abstract = {Statistical shape analysis is a crucial technique for studying deformations within collections of shapes, particularly in the field of Medical Imaging. However, the high density of meshes typically used to represent medical data poses a challenge for standard geometry processing tools due to their limited efficiency. While spectral approaches offer a promising solution by effectively handling high-frequency variations inherent in such data, their scalability is questioned by their need to solve eigendecompositions of large sparse matrices. In this paper, we introduce S4A, a novel and efficient method based on spectral geometry processing, that addresses these issues with a low computational cost. It operates in four stages: (i) establishing correspondences between each pair of shapes in the collection, (ii) defining a common latent space to encode deformations across the entire collection, (iii) computing statistical quantities to identify, highlight, and measure the most representative variations within the collection, and iv) performing information transfer from labeled data to large collections of shapes. Unlike previous methods, S4A provides a highly efficient solution across all stages of the process.We demonstrate the advantages of our approach by comparing its accuracy and computational efficiency to existing pipelines, and by showcasing the comprehensive statistical insights that can be derived from applying our method to a collection of medical data.},
note = {Edition: 1343},
keywords = {Computational Geometry, Computer graphics, Computer Vision and Pattern Recognition, Healthcare, Shape Analysis, Shape Matching, Spectral Geometry},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
Maggioli, Filippo; Klein, Jonathan; Hädrich, Torsten; Rodolà, Emanuele; Pałubicki, Wojtek; Pirk, Sören; Michels, Dominik L.
A Physically-inspired Approach to the Simulation of Plant Wilting Proceedings Article
In: SIGGRAPH Asia 2023 Conference Papers, pp. 1–8, ACM, Sydney NSW Australia, 2023, ISBN: 979-8-4007-0315-7.
Abstract | Links | BibTeX | Tags: Computer graphics, Computer Vision and Pattern Recognition, Physical Simulations
@inproceedings{maggioli_physically-inspired_2023,
title = {A Physically-inspired Approach to the Simulation of Plant Wilting},
author = {Filippo Maggioli and Jonathan Klein and Torsten Hädrich and Emanuele Rodolà and Wojtek Pałubicki and Sören Pirk and Dominik L. Michels},
url = {https://dl.acm.org/doi/10.1145/3610548.3618218},
doi = {10.1145/3610548.3618218},
isbn = {979-8-4007-0315-7},
year = {2023},
date = {2023-12-01},
urldate = {2025-03-30},
booktitle = {SIGGRAPH Asia 2023 Conference Papers},
pages = {1–8},
publisher = {ACM},
address = {Sydney NSW Australia},
abstract = {Plants are among the most complex objects to be modeled in computer graphics. While a large body of work is concerned with structural modeling and the dynamic reaction to external forces, our work focuses on the dynamic deformation caused by plant internal wilting processes. To this end, we motivate the simulation of water transport inside the plant which is a key driver of the wilting process. We then map the change of water content in individual plant parts to branch stiffness values and obtain the wilted plant shape through a position based dynamics simulation. We show, that our approach can recreate measured wilting processes and does so with a higher fidelity than approaches ignoring the internal water flow. Realistic plant wilting is not only important in a computer graphics context but can also aid the development of machine learning algorithms in agricultural applications through the generation of synthetic training data.},
keywords = {Computer graphics, Computer Vision and Pattern Recognition, Physical Simulations},
pubstate = {published},
tppubtype = {inproceedings}
}
Mennella, Ciro; Maniscalco, Umberto; Pietro, Giuseppe De; Esposito, Massimo
A deep learning system to monitor and assess rehabilitation exercises in home-based remote and unsupervised conditions Journal Article
In: Computers in Biology and Medicine, vol. 166, pp. 107485, 2023, ISSN: 00104825.
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Movement classification, Pose estimation, Rehabilitation
@article{mennella_deep_2023,
title = {A deep learning system to monitor and assess rehabilitation exercises in home-based remote and unsupervised conditions},
author = {Ciro Mennella and Umberto Maniscalco and Giuseppe De Pietro and Massimo Esposito},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0010482523009502},
doi = {10.1016/j.compbiomed.2023.107485},
issn = {00104825},
year = {2023},
date = {2023-11-01},
urldate = {2024-07-21},
journal = {Computers in Biology and Medicine},
volume = {166},
pages = {107485},
abstract = {In the domain of physical rehabilitation, the progress in machine learning and the availability of cost-effective motion capture technologies have paved the way for innovative systems capable of capturing human movements, automatically analyzing recorded data, and evaluating movement quality.
This study introduces a novel, economically viable system designed for monitoring and assessing rehabilitation exercises. The system enables real-time evaluation of exercises, providing precise insights into deviations from correct execution. The evaluation comprises two significant components: range of motion (ROM) classification and compensatory pattern recognition. To develop and validate the effectiveness of the system, a unique dataset of 6 resistance training exercises was acquired.
The proposed system demonstrated impressive capabilities in motion monitoring and evaluation. Notably, we achieved promising results, with mean accuracies of 89% for evaluating ROM-class and 98% for classifying compensatory patterns.
By complementing conventional rehabilitation assessments conducted by skilled clinicians, this cutting-edge system has the potential to significantly improve rehabilitation practices. Additionally, its integration in home-based rehabilitation programs can greatly enhance patient outcomes and increase access to high-quality care.},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Movement classification, Pose estimation, Rehabilitation},
pubstate = {published},
tppubtype = {article}
}
This study introduces a novel, economically viable system designed for monitoring and assessing rehabilitation exercises. The system enables real-time evaluation of exercises, providing precise insights into deviations from correct execution. The evaluation comprises two significant components: range of motion (ROM) classification and compensatory pattern recognition. To develop and validate the effectiveness of the system, a unique dataset of 6 resistance training exercises was acquired.
The proposed system demonstrated impressive capabilities in motion monitoring and evaluation. Notably, we achieved promising results, with mean accuracies of 89% for evaluating ROM-class and 98% for classifying compensatory patterns.
By complementing conventional rehabilitation assessments conducted by skilled clinicians, this cutting-edge system has the potential to significantly improve rehabilitation practices. Additionally, its integration in home-based rehabilitation programs can greatly enhance patient outcomes and increase access to high-quality care.
Dipanda, Albert; Gallo, Luigi; Yetongnon, Kokou (Ed.)
2023 17th International Conference on Signal-Image Technology & Internet-Based Systems (SITIS) Book
IEEE Computer Society, 2023, ISBN: 979-8-3503-7091-1, (tex.referencetype: proceedings).
Abstract | Links | BibTeX | Tags: Computer Vision and Pattern Recognition, Image Processing
@book{dipanda_2023_2023,
title = {2023 17th International Conference on Signal-Image Technology & Internet-Based Systems (SITIS)},
editor = {Albert Dipanda and Luigi Gallo and Kokou Yetongnon},
url = {https://ieeexplore.ieee.org/servlet/opac?punumber=10472709},
isbn = {979-8-3503-7091-1},
year = {2023},
date = {2023-11-01},
publisher = {IEEE Computer Society},
abstract = {We are pleased to welcome you to SITIS 2023, the seventeenth edition of the IEEE International
Conference on Signal-Image Technology & Internet-Based Systems. We thank the authors for their
valuable contributions to the conference. SITIS 2023 aims to bring together researchers from the major
communities of signal/image processing and information modeling and analysis, and to foster crossdisciplinary
collaborations. The conference consists of two tracks: SIVT (Signal & Image and Vision
Technology), which focuses on recent developments and evolutions in signal processing, image
analysis, vision, coding & authentication, and retrieval techniques; and ISSA (Intelligent Systems
Services and Applications), which covers emerging concepts, architectures, protocols, and
methodologies for data management on the Web and the Internet of Things technologies that connect
unlimited numbers of smart objects. In addition to these tracks, SITIS 2023 also features some
workshops that address a wide range of related but more specific topics.},
note = {tex.referencetype: proceedings},
keywords = {Computer Vision and Pattern Recognition, Image Processing},
pubstate = {published},
tppubtype = {book}
}
Conference on Signal-Image Technology & Internet-Based Systems. We thank the authors for their
valuable contributions to the conference. SITIS 2023 aims to bring together researchers from the major
communities of signal/image processing and information modeling and analysis, and to foster crossdisciplinary
collaborations. The conference consists of two tracks: SIVT (Signal & Image and Vision
Technology), which focuses on recent developments and evolutions in signal processing, image
analysis, vision, coding & authentication, and retrieval techniques; and ISSA (Intelligent Systems
Services and Applications), which covers emerging concepts, architectures, protocols, and
methodologies for data management on the Web and the Internet of Things technologies that connect
unlimited numbers of smart objects. In addition to these tracks, SITIS 2023 also features some
workshops that address a wide range of related but more specific topics.
Generosi, Andrea; Agostinelli, Thomas; Mengoni, Maura
Smart retrofitting for human factors: a face recognition-based system proposal Journal Article
In: International Journal on Interactive Design and Manufacturing (IJIDeM), vol. 17, no. 1, pp. 421–433, 2023, ISSN: 1955-2505.
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Human-Centered Design, Industry 4.0, Machine Learning
@article{generosi_smart_2023,
title = {Smart retrofitting for human factors: a face recognition-based system proposal},
author = {Andrea Generosi and Thomas Agostinelli and Maura Mengoni},
url = {https://doi.org/10.1007/s12008-022-01035-4},
doi = {10.1007/s12008-022-01035-4},
issn = {1955-2505},
year = {2023},
date = {2023-02-01},
urldate = {2024-12-28},
journal = {International Journal on Interactive Design and Manufacturing (IJIDeM)},
volume = {17},
number = {1},
pages = {421–433},
abstract = {Industry nowadays must deal with the so called “fourth industrial revolution”, i.e. Industry 4.0. This revolution is based on the introduction of new paradigms in the manufacturing industry such as flexibility, efficiency, safety, digitization, big data analysis and interconnection. However, human factors’ integration is usually not considered, although included as one of the paradigms. Some of these human factors’ most overlooked aspects are the customization of the worker’s user experience and on-board safety. Moreover, the issue of integrating state of the art technologies on legacy machines is also of utmost importance, as it can make a considerable difference on the economic and environmental aspects of their management, by extending the machine’s life cycle. In response to this issue, the Retrofitting paradigm, the addition of new technologies to legacy machines, has been considered. In this paper we propose a novel modular system architecture for secure authentication and worker’s log-in/log-out traceability based on face recognition and on state-of-the-art Deep Learning and Computer Vision techniques, as Convolutional Neural Networks. Starting from the proposed architecture, we developed and tested a device designed to retrofit legacy machines with such capabilities, keeping particular attention to the interface usability in the design phase, little considered in retrofitting applications along with other Human Factors, despite being one of the pillars of Industry 4.0. This research work’s results showed a dramatic improvement regarding machines on-board access safety.},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Human-Centered Design, Industry 4.0, Machine Learning},
pubstate = {published},
tppubtype = {article}
}
Palloni, Lorenzo; Galteri, Leonardo; Bertini, Marco
Optimization Techniques of Deep Learning Models for Visual Quality Improvement Book Section
In: New Trends in Intelligent Software Methodologies, Tools and Techniques, pp. 173–184, IOS Press, 2023, (tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Compression Artifact Removal, Computer Vision and Pattern Recognition, Generative Adversarial Networks, Machine Learning, Network Quantization, Real-time Video Processing, Super-Resolution, Video Streaming
@incollection{palloniOptimizationTechniquesDeep2023,
title = {Optimization Techniques of Deep Learning Models for Visual Quality Improvement},
author = {Lorenzo Palloni and Leonardo Galteri and Marco Bertini},
url = {https://ebooks.iospress.nl/doi/10.3233/FAIA230233},
year = {2023},
date = {2023-01-01},
booktitle = {New Trends in Intelligent Software Methodologies, Tools and Techniques},
pages = {173–184},
publisher = {IOS Press},
abstract = {Video restoration is a widely studied task in the field of computer vision and image processing. The primary objective of video restoration is to improve the visual quality of degraded videos caused by various factors, such as noise, blur, compression artifacts, and other distortions. In this study, the integration of post-training quantization techniques was investigated to optimize deep learning models for super-resolution inference. The results indicate that reducing the precision of weights and activations in these models substantially decreases the computational complexity and memory requirements without compromising performance, rendering them more practical and cost-effective for real-world applications, where real-time inference is often required. When TensorRT was integrated with PyTorch, the efficiency of the model was further improved taking advantage of the INT8 computational capabilities of recent NVIDIA GPUs.},
note = {tex.copyright: All rights reserved},
keywords = {Artificial Intelligence, Compression Artifact Removal, Computer Vision and Pattern Recognition, Generative Adversarial Networks, Machine Learning, Network Quantization, Real-time Video Processing, Super-Resolution, Video Streaming},
pubstate = {published},
tppubtype = {incollection}
}
Ferrari, Claudio; Becattini, Federico; Galteri, Leonardo; Bimbo, Alberto Del
(Compress and restore) N: A robust defense against adversarial attacks on image classification Journal Article
In: ACM Transactions on Multimedia Computing, Communications and Applications, vol. 19, no. 1s, pp. 1–16, 2023, (ISBN: 1551-6857 Publisher: ACM New York, NY tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Adversarial Attacks, Adversarial Defense Mechanisms, Artificial Intelligence, Computer Vision and Pattern Recognition, Gradient Obfuscation, Image Classification, Image Restoration, Robustness in AI Models
@article{ferrariCompressRestoreRobust2023,
title = {(Compress and restore) N: A robust defense against adversarial attacks on image classification},
author = {Claudio Ferrari and Federico Becattini and Leonardo Galteri and Alberto Del Bimbo},
url = {https://dl.acm.org/doi/pdf/10.1145/3524619},
doi = {10.1145/3524619},
year = {2023},
date = {2023-01-01},
journal = {ACM Transactions on Multimedia Computing, Communications and Applications},
volume = {19},
number = {1s},
pages = {1–16},
abstract = {Modern image classification approaches often rely on deep neural networks, which have shown pronounced weakness to adversarial examples: images corrupted with specifically designed yet imperceptible noise that causes the network to misclassify. In this article, we propose a conceptually simple yet robust solution to tackle adversarial attacks on image classification. Our defense works by first applying a JPEG compression with a random quality factor; compression artifacts are subsequently removed by means of a generative model Artifact Restoration GAN. The process can be iterated ensuring the image is not degraded and hence the classification not compromised. We train different AR-GANs for different compression factors, so that we can change its parameters dynamically at each iteration depending on the current compression, making the gradient approximation difficult. We experiment with our defense against three white-box and two blackbox attacks, with a particular focus on the state-of-the-art BPDA attack. Our method does not require any adversarial training, and is independent of both the classifier and the attack. Experiments demonstrate that dynamically changing the AR-GAN parameters is of fundamental importance to obtain significant robustness.},
note = {ISBN: 1551-6857
Publisher: ACM New York, NY
tex.copyright: All rights reserved},
keywords = {Adversarial Attacks, Adversarial Defense Mechanisms, Artificial Intelligence, Computer Vision and Pattern Recognition, Gradient Obfuscation, Image Classification, Image Restoration, Robustness in AI Models},
pubstate = {published},
tppubtype = {article}
}
Fontanini, Tomaso; Ferrari, Claudio; Lisanti, Giuseppe; Galteri, Leonardo; Berretti, Stefano; Bertozzi, Massimo; Prati, Andrea
FrankenMask: Manipulating semantic masks with transformers for face parts editing Journal Article
In: Pattern Recognition Letters, vol. 176, pp. 14–20, 2023, (ISBN: 0167-8655 Publisher: North-Holland tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Face Editing, Generative Adversarial Networks, Image Processing, Transformer Networks
@article{fontaniniFrankenMaskManipulatingSemantic2023,
title = {FrankenMask: Manipulating semantic masks with transformers for face parts editing},
author = {Tomaso Fontanini and Claudio Ferrari and Giuseppe Lisanti and Leonardo Galteri and Stefano Berretti and Massimo Bertozzi and Andrea Prati},
url = {https://www.sciencedirect.com/science/article/pii/S0167865523002829},
doi = {10.1016/j.patrec.2023.10.010},
year = {2023},
date = {2023-01-01},
journal = {Pattern Recognition Letters},
volume = {176},
pages = {14–20},
abstract = {In this paper, we propose FrankenMask, a novel framework that allows swapping and rearranging face parts in semantic masks for automatic editing of shape-related facial attributes. This is a novel yet challenging task as substituting face parts in a semantic mask requires to account for possible spatial misalignment and the adaptation of surrounding regions. We obtain such a feature by combining a Transformer encoder to learn the spatial relationships of facial parts, with an encoder–decoder architecture, which reconstructs a complete mask from the composition of local parts. Reconstruction and attribute classification results demonstrate the effective synthesis of facial images, while showing the generation of accurate and plausible facial attributes. Code is available at https://github.com/TFonta/FrankenMask_semantic.},
note = {ISBN: 0167-8655
Publisher: North-Holland
tex.copyright: All rights reserved},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Face Editing, Generative Adversarial Networks, Image Processing, Transformer Networks},
pubstate = {published},
tppubtype = {article}
}
Agnolucci, Lorenzo; Galteri, Leonardo; Bertini, Marco; Bimbo, Alberto Del
Perceptual quality improvement in videoconferencing using keyframes-based gan Journal Article
In: IEEE Transactions on Multimedia, vol. 26, pp. 339–352, 2023, (ISBN: 1520-9210 Publisher: IEEE tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Compression Artifact Removal, Computer Vision and Pattern Recognition, Face Restoration, Generative Adversarial Networks, Real-time Video Processing, Video Conferencing, Video Streaming
@article{agnolucciPerceptualQualityImprovement2023,
title = {Perceptual quality improvement in videoconferencing using keyframes-based gan},
author = {Lorenzo Agnolucci and Leonardo Galteri and Marco Bertini and Alberto Del Bimbo},
url = {https://ieeexplore.ieee.org/abstract/document/10093128},
doi = {10.1109/TMM.2023.3264882},
year = {2023},
date = {2023-01-01},
journal = {IEEE Transactions on Multimedia},
volume = {26},
pages = {339–352},
abstract = {In the latest years, videoconferencing has taken a fundamental role in interpersonal relations, both for personal and business purposes. Lossy video compression algorithms are the enabling technology for videoconferencing, as they reduce the bandwidth required for real-time video streaming. However, lossy video compression decreases the perceived visual quality. Thus, many techniques for reducing compression artifacts and improving video visual quality have been proposed in recent years. In this work, we propose a novel GAN-based method for compression artifacts reduction in videoconferencing. Given that, in this context, the speaker is typically in front of the camera and remains the same for the entire duration of the transmission, we can maintain a set of reference keyframes of the person from the higher-quality I-frames that are transmitted within the video stream and exploit them to guide the visual quality improvement; a novel aspect of this approach is the update policy that maintains and updates a compact and effective set of reference keyframes. First, we extract multi-scale features from the compressed and reference frames. Then, our architecture combines these features in a progressive manner according to facial landmarks. This allows the restoration of the high-frequency details lost after the video compression. Experiments show that the proposed approach improves visual quality and generates photo-realistic results even with high compression rates.},
note = {ISBN: 1520-9210
Publisher: IEEE
tex.copyright: All rights reserved},
keywords = {Artificial Intelligence, Compression Artifact Removal, Computer Vision and Pattern Recognition, Face Restoration, Generative Adversarial Networks, Real-time Video Processing, Video Conferencing, Video Streaming},
pubstate = {published},
tppubtype = {article}
}
2022
Generosi, Andrea; Agostinelli, Thomas; Ceccacci, Silvia; Mengoni, Maura
A novel platform to enable the future human-centered factory Journal Article
In: The International Journal of Advanced Manufacturing Technology, vol. 122, no. 11-12, pp. 4221–4233, 2022, ISSN: 0268-3768, 1433-3015.
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Human-Centered Design
@article{generosi_novel_2022,
title = {A novel platform to enable the future human-centered factory},
author = {Andrea Generosi and Thomas Agostinelli and Silvia Ceccacci and Maura Mengoni},
url = {https://link.springer.com/10.1007/s00170-022-09880-z},
doi = {10.1007/s00170-022-09880-z},
issn = {0268-3768, 1433-3015},
year = {2022},
date = {2022-10-01},
urldate = {2024-12-28},
journal = {The International Journal of Advanced Manufacturing Technology},
volume = {122},
number = {11-12},
pages = {4221–4233},
abstract = {This paper introduces a web-platform system that performs semi-automatic compute of several risk indexes, based on the considered evaluation method (e.g., RULA—Rapid Upper Limb Assessment, REBA—Rapid Entire Body Assessment, OCRA—OCcupational Repetitive Action) to support ergonomics risk estimation, and provides augmented analytics to proactively improve ergonomic risk monitoring based on the characteristics of workers (e.g., age, gender), working tasks, and environment. It implements a body detection system, marker-less and low cost, based on the use of RGB cameras, which exploits the open-source deep learning model CMU (Carnegie Mellon University), from the tf-pose-estimation project, assuring worker privacy and data protection, which has been already successfully assessed in standard laboratory conditions. The paper provides a full description of the proposed platform and reports the results of validation in a real industrial case study regarding a washing machine assembly line composed by 5 workstations. A total of 15 workers have been involved. Results suggest how the proposed system is able to significantly speed up the ergonomic assessment and to predict angles and perform a RULA and OCRA analysis, with an accuracy comparable to that obtainable from a manual analysis, even under the unpredictable conditions that can be found in a real working environment.},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Human-Centered Design},
pubstate = {published},
tppubtype = {article}
}
Barbareschi, Mario; Barone, Salvatore; Bosio, Alberto; Han, Jie; Traiola, Marcello
A Genetic-algorithm-based Approach to the Design of DCT Hardware Accelerators Journal Article
In: ACM Journal on Emerging Technologies in Computing Systems, vol. 18, no. 3, pp. 1–25, 2022, ISSN: 1550-4832, 1550-4840, (tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Image Processing, Low power approximate circuits
@article{barbareschi_genetic-algorithm-based_2022,
title = {A Genetic-algorithm-based Approach to the Design of DCT Hardware Accelerators},
author = {Mario Barbareschi and Salvatore Barone and Alberto Bosio and Jie Han and Marcello Traiola},
doi = {10.1145/3501772},
issn = {1550-4832, 1550-4840},
year = {2022},
date = {2022-07-01},
urldate = {2022-04-25},
journal = {ACM Journal on Emerging Technologies in Computing Systems},
volume = {18},
number = {3},
pages = {1–25},
abstract = {As modern applications demand an unprecedented level of computational resources, traditional computing system design paradigms are no longer adequate to guarantee significant performance enhancement at an affordable cost. Approximate Computing (AxC) has been introduced as a potential candidate to achieve better computational performances by relaxing non-critical functional system specifications. In this article, we propose a systematic and high-abstraction-level approach allowing the automatic generation of near Pareto-optimal approximate configurations for a Discrete Cosine Transform (DCT) hardware accelerator. We obtain the approximate variants by using approximate operations, having configurable approximation degree, rather than full-precise ones. We use a genetic searching algorithm to find the appropriate tuning of the approximation degree, leading to optimal tradeoffs between accuracy and gains. Finally, to evaluate the actual HW gains, we synthesize non-dominated approximate DCT variants for two different target technologies, namely, Field Programmable Gate Arrays (FPGAs) and Application Specific Integrated Circuits (ASICs). Experimental results show that the proposed approach allows performing a meaningful exploration of the design space to find the best tradeoffs in a reasonable time. Indeed, compared to the state-of-the-art work on approximate DCT, the proposed approach allows an 18% average energy improvement while providing at the same time image quality improvement.},
note = {tex.copyright: All rights reserved},
keywords = {Image Processing, Low power approximate circuits},
pubstate = {published},
tppubtype = {article}
}
Moschella, Luca; Melzi, Simone; Cosmo, Luca; Maggioli, Filippo; Litany, Or; Ovsjanikov, Maks; Guibas, Leonidas; Rodolà, Emanuele
Learning Spectral Unions of Partial Deformable 3D Shapes Journal Article
In: Computer Graphics Forum, vol. 41, no. 2, pp. 407–417, 2022, ISSN: 0167-7055, 1467-8659.
Abstract | Links | BibTeX | Tags: Computer graphics, Computer Vision and Pattern Recognition, Geometry Processing, Shape Analysis, Shape Matching, Spectral Geometry
@article{moschella_learning_2022,
title = {Learning Spectral Unions of Partial Deformable 3D Shapes},
author = {Luca Moschella and Simone Melzi and Luca Cosmo and Filippo Maggioli and Or Litany and Maks Ovsjanikov and Leonidas Guibas and Emanuele Rodolà},
url = {https://onlinelibrary.wiley.com/doi/10.1111/cgf.14483},
doi = {10.1111/cgf.14483},
issn = {0167-7055, 1467-8659},
year = {2022},
date = {2022-05-01},
urldate = {2023-06-28},
journal = {Computer Graphics Forum},
volume = {41},
number = {2},
pages = {407–417},
abstract = {Spectral geometric methods have brought revolutionary changes to the field of geometry processing. Of particular interest is the study of the Laplacian spectrum as a compact, isometry and permutation-invariant representation of a shape. Some recent works show how the intrinsic geometry of a full shape can be recovered from its spectrum, but there are approaches that consider the more challenging problem of recovering the geometry from the spectral information of partial shapes. In this paper, we propose a possible way to fill this gap. We introduce a learning-based method to estimate the Laplacian spectrum of the union of partial non-rigid 3D shapes, without actually computing the 3D geometry of the union or any correspondence between those partial shapes. We do so by operating purely in the spectral domain and by defining the union operation between short sequences of eigenvalues. We show that the approximated union spectrum can be used as-is to reconstruct the complete geometry [MRC*19], perform region localization on a template [RTO*19] and retrieve shapes from a database, generalizing ShapeDNA [RWP06] to work with partialities. Working with eigenvalues allows us to deal with unknown correspondence, different sampling, and different discretizations (point clouds and meshes alike), making this operation especially robust and general. Our approach is data-driven and can generalize to isometric and non-isometric deformations of the surface, as long as these stay within the same semantic class (e.g., human bodies or horses), as well as to partiality artifacts not seen at training time.},
keywords = {Computer graphics, Computer Vision and Pattern Recognition, Geometry Processing, Shape Analysis, Shape Matching, Spectral Geometry},
pubstate = {published},
tppubtype = {article}
}
Galteri, Leonardo; Seidenari, Lorenzo; Bongini, Pietro; Bertini, Marco; Bimbo, Alberto Del
LANBIQUE: LANguage-based Blind Image QUality Evaluation Journal Article
In: ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM), 2022, (Publisher: ACM tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Image Captioning, Image Processing, Quality Assessment, Vision and Language
@article{galteriLANBIQUELANguagebasedBlind2022,
title = {LANBIQUE: LANguage-based Blind Image QUality Evaluation},
author = {Leonardo Galteri and Lorenzo Seidenari and Pietro Bongini and Marco Bertini and Alberto Del Bimbo},
url = {https://dl.acm.org/doi/full/10.1145/3538649},
doi = {10.1145/3538649},
year = {2022},
date = {2022-01-01},
journal = {ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM)},
abstract = {Image quality assessment is often performed with deep networks that are fine-tuned to regress a human provided quality score of a given image. Usually, this approach may lack generalization capabilities and, while being highly precise on similar image distribution, it may yield lower correlation on unseen distortions. In particular, they show poor performances, whereas images corrupted by noise, blur, or compression have been restored by generative models. As a matter of fact, evaluation of these generative models is often performed providing anecdotal results to the reader. In the case of image enhancement and restoration, reference images are usually available. Nevertheless, using signal based metrics often leads to counterintuitive results: Highly natural crisp images may obtain worse scores than blurry ones. However, blind reference image assessment may rank images reconstructed with GANs higher than the original undistorted images. To avoid time-consuming human-based image assessment, semantic computer vision tasks may be exploited instead. In this article, we advocate the use of language generation tasks to evaluate the quality of restored images. We refer to our assessment approach as LANguage-based Blind Image QUality Evaluation (LANBIQUE). We show experimentally that image captioning, used as a downstream task, may serve as a method to score image quality, independently of the distortion process that affects the data. Captioning scores are better aligned with human rankings with respect to classic signal based or No-reference image quality metrics. We show insights on how the corruption, by artefacts, of local image structure may steer image captions in the wrong direction.},
note = {Publisher: ACM
tex.copyright: All rights reserved},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Image Captioning, Image Processing, Quality Assessment, Vision and Language},
pubstate = {published},
tppubtype = {article}
}
Agnolucci, Lorenzo; Galteri, Leonardo; Bertini, Marco; Bimbo, Alberto Del
Restoration of analog videos using Swin-UNet Proceedings Article
In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 6985–6987, 2022, (tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Compression Artifact Removal, Computer Vision and Pattern Recognition, Digital Archiving, Image Processing, Transformer Networks
@inproceedings{agnolucciRestorationAnalogVideos2022,
title = {Restoration of analog videos using Swin-UNet},
author = {Lorenzo Agnolucci and Leonardo Galteri and Marco Bertini and Alberto Del Bimbo},
url = {https://dl.acm.org/doi/abs/10.1145/3503161.3547730},
doi = {10.1145/3503161.3547730},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the 30th ACM International Conference on Multimedia},
pages = {6985–6987},
abstract = {In this paper we present a system to restore analog videos of historical archives. These videos often contain severe visual degradation due to the deterioration of their tape supports that require costly and slow manual interventions to recover the original content. The proposed system uses a multi-frame approach and is able to deal also with severe tape mistracking, which results in completely scrambled frames. Tests on real-world videos from a major historical video archive show the effectiveness of our demo system.},
note = {tex.copyright: All rights reserved},
keywords = {Artificial Intelligence, Compression Artifact Removal, Computer Vision and Pattern Recognition, Digital Archiving, Image Processing, Transformer Networks},
pubstate = {published},
tppubtype = {inproceedings}
}
Bertini, Marco; Galteri, Leonardo; Seidenari, Lorenzo; Uricchio, Tiberio; Bimbo, Alberto Del
Fast and effective AI approaches for video quality improvement Proceedings Article
In: Proceedings of the 1st Mile-High Video Conference, pp. 77–78, 2022, (tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Compression Artifact Removal, Generative Adversarial Networks, Image Processing, Image Synthesis and Enhancement, Real-time Video Processing
@inproceedings{bertiniFastEffectiveAI2022,
title = {Fast and effective AI approaches for video quality improvement},
author = {Marco Bertini and Leonardo Galteri and Lorenzo Seidenari and Tiberio Uricchio and Alberto Del Bimbo},
url = {https://dl.acm.org/doi/abs/10.1145/3510450.3517270},
doi = {10.1145/3510450.3517270},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the 1st Mile-High Video Conference},
pages = {77–78},
abstract = {In this work we present solutions based on AI techniques to the problem of real-time video quality improvement, addressing both video super resolution and compression artefact removal. These solutions can be used to revamp video archive materials allowing their reuse in modern video production and to improve the end user experience playing streaming videos in higher quality while requiring less bandwidth for their transmission. The proposed approaches can be used on a variety of devices as a post-processing step, without requiring any change in existing video encoding and transmission pipelines. Experiments on standard video datasets have shown that the proposed approaches improve video quality metrics considering either fixed bandwidth budgets or fixed quality goals.},
note = {tex.copyright: All rights reserved},
keywords = {Artificial Intelligence, Compression Artifact Removal, Generative Adversarial Networks, Image Processing, Image Synthesis and Enhancement, Real-time Video Processing},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Agostinelli, Thomas; Generosi, Andrea; Ceccacci, Silvia; Khamaisi, Riccardo Karim; Peruzzini, Margherita; Mengoni, Maura
Preliminary Validation of a Low-Cost Motion Analysis System Based on RGB Cameras to Support the Evaluation of Postural Risk Assessment Journal Article
In: Applied Sciences, vol. 11, no. 22, pp. 10645, 2021, ISSN: 2076-3417.
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Industry 4.0, Real-time Video Processing
@article{agostinelli_preliminary_2021,
title = {Preliminary Validation of a Low-Cost Motion Analysis System Based on RGB Cameras to Support the Evaluation of Postural Risk Assessment},
author = {Thomas Agostinelli and Andrea Generosi and Silvia Ceccacci and Riccardo Karim Khamaisi and Margherita Peruzzini and Maura Mengoni},
url = {https://www.mdpi.com/2076-3417/11/22/10645},
doi = {10.3390/app112210645},
issn = {2076-3417},
year = {2021},
date = {2021-11-01},
urldate = {2024-12-28},
journal = {Applied Sciences},
volume = {11},
number = {22},
pages = {10645},
abstract = {This paper introduces a low-cost and low computational marker-less motion capture system based on the acquisition of frame images through standard RGB cameras. It exploits the open-source deep learning model CMU, from the tf-pose-estimation project. Its numerical accuracy and its usefulness for ergonomic assessment are evaluated by a proper experiment, designed and performed to: (1) compare the data provided by it with those collected from a motion capture golden standard system; (2) compare the RULA scores obtained with data provided by it with those obtained with data provided by the Vicon Nexus system and those estimated through video analysis, by a team of three expert ergonomists. Tests have been conducted in standardized laboratory conditions and involved a total of six subjects. Results suggest that the proposed system can predict angles with good consistency and give evidence about the tool’s usefulness for ergonomist.},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Deep Learning, Industry 4.0, Real-time Video Processing},
pubstate = {published},
tppubtype = {article}
}
Seidenari, Lorenzo; Galteri, Leonardo; Bongini, Pietro; Bertini, Marco; Bimbo, Alberto Del
Language based image quality assessment Proceedings Article
In: Proceedings of the 3rd ACM International Conference on Multimedia in Asia, pp. 1–7, 2021, (tex.copyright: All rights reserved).
Abstract | Links | BibTeX | Tags: Artificial Intelligence, Computer Vision and Pattern Recognition, Image Captioning, Image Processing, Quality Assessment, Vision and Language
@inproceedings{seidenariLanguageBasedImage2021,
title = {Language based image quality assessment},
author = {Lorenzo Seidenari and Leonardo Galteri and Pietro Bongini and Marco Bertini and Alberto Del Bimbo},
url = {https://dl.acm.org/doi/abs/10.1145/3469877.3490605},
doi = {10.1145/3469877.3490605},
year = {2021},
date = {2021-01-01},
booktitle = {Proceedings of the 3rd ACM International Conference on Multimedia in Asia},
pages = {1–7},
abstract = {Evaluation of generative models, in the visual domain, is often performed providing anecdotal results to the reader. In the case of image enhancement, reference images are usually available. Nonetheless, using signal based metrics often leads to counterintuitive results: highly natural crisp images may obtain worse scores than blurry ones. On the other hand, blind reference image assessment may rank images reconstructed with GANs higher than the original undistorted images. To avoid time consuming human based image assessment, semantic computer vision tasks may be exploited instead [9, 25, 33]. In this paper we advocate the use of language generation tasks to evaluate the quality of restored images. We show experimentally that image captioning, used as a downstream task, may serve as a method to score image quality. Captioning scores are better aligned with human rankings with respect to signal based metrics or no-reference image quality metrics. We show insights on how the corruption, by artifacts, of local image structure may steer image captions in the wrong direction.},
note = {tex.copyright: All rights reserved},
keywords = {Artificial Intelligence, Computer Vision and Pattern Recognition, Image Captioning, Image Processing, Quality Assessment, Vision and Language},
pubstate = {published},
tppubtype = {inproceedings}
}