@article{arda2026croissantminer,title={CroissantMiner: Automated Extraction and Validation of Croissant Metadata for ML Datasets},author={Arda, Berke and Akhtar, Mubashara and Yavuz, Ahmetcan and Gerry, Paul and Lobentanzer, Sebastian and Sarwar, Nobin and Giner-Miguelez, Joan and Chen, Kongtao and Zhang, Luyao and Sachan, Mrinmaya},year={2026},note={Coming soon},dimensions={true},demo={https://huggingface.co/spaces/bearda/croissantminer},dataset={https://huggingface.co/datasets/croissantminer/croissantminer}}
Preprint’26
Croissant Baker: Metadata Generation for Discoverable, Governable, and Reusable ML Datasets
Rafi Al Attrach, Rajna Fani, Sebastian Lobentanzer, Joan Giner-Miguelez, Debanshu Das, Varuni H. K., Nobin Sarwar, and 13 more authors
@article{attrach2026croissantbaker,title={Croissant Baker: Metadata Generation for Discoverable, Governable, and Reusable ML Datasets},author={Al Attrach, Rafi and Fani, Rajna and Lobentanzer, Sebastian and Giner-Miguelez, Joan and Das, Debanshu and H. K., Varuni and Sarwar, Nobin and Ghosh, Rajat and Archit, Anwai and Motghare, Surbhi and Parry, Christina Conrad and Oala, Luis and Grosso, Lara and Vanschoren, Joaquin and Vogler, Steffen and Goswami, Sujata and Rosenthal, Eric S. and Ghassemi, Marzyeh and McDermott, Matthew and Pollard, Tom},journal={arXiv preprint arXiv:2605.15079},year={2026},dimensions={true},}
ACL’26
Multimodal Unlearning Across Vision, Language, Video, and Audio: Survey of Methods, Datasets, and Benchmarks
@inproceedings{sarwar2026mm-unlearning-survey,title={{Multimodal Unlearning Across Vision, Language, Video, and Audio: Survey of Methods, Datasets, and Benchmarks}},author={Sarwar, Nobin and {Roy Dipta}, Shubhashis and Liu, Zheyuan and Patil, Vaidehi},booktitle={Findings of ACL},year={2026},address={San Diego, California, USA},month=jul,publisher={Association for Computational Linguistics},techrxiv={http://dx.doi.org/10.36227/techrxiv.176945748.88280394/v1},dimensions={true},}
2025
NeurIPS-W’25
FedMentor: Domain-Aware Differential Privacy for Heterogeneous Federated LLMs in Mental Health