-
Notifications
You must be signed in to change notification settings - Fork 0
/
references copy.bib
7319 lines (6630 loc) · 693 KB
/
references copy.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{shindo2023alphailp,
Anote={./images/shindo2023alphailp.png},
author={Hikaru Shindo and Viktor Pfanschilling and Devendra Singh Dhami and Kristian Kersting},
note = {Deep neural learning has shown remarkable performance at learning representations for visual object categorization. However, deep neural networks such as CNNs do not explicitly encode objects and relations among them. This limits their success on tasks that require a deep logical understanding of visual scenes, such as Kandinsky patterns and Bongard problems. To overcome these limitations, we introduce 𝛼ILP, a novel differentiable inductive logic programming framework that learns to represent scenes as logic programs—intuitively, logical atoms correspond to objects, attributes, and relations, and clauses encode high-level scene information. 𝛼ILP has an end-to-end reasoning architecture from visual inputs. Using it, 𝛼ILP performs differentiable inductive logic programming on complex visual scenes, i.e., the logical rules are learned by gradient descent. Our extensive experiments on Kandinsky patterns and CLEVR-Hans benchmarks demonstrate the accuracy and efficiency of 𝛼ILP in learning complex visual-logical concepts.},
title={alphaILP: Thinking Visual Scenes as Differentiable Logic Programs},
journal={Machine Learning Journal (MLJ)},
Publisher = {Springer},
year={2023},
Crossref = {https://ml-research.github.io/alphailpdoc/},
url={https://link.springer.com/article/10.1007/s10994-023-06320-1},
keywords={Neuro-Symbolic AI, Differentiable Reasoning, Inductive Logic Programming, Object-centric Learning}
}
@article{skyagin2023slash,
Anote = {./images/skyagin2023slash.png},
Author = {Arseny Skryagin and Daniel Ochs and Devendra Singh Dhami and Kristian Kersting},
Journal = {Journal of Artificial Intelligence Research (JAIR)},
Keywords = {Statistical Relational Learning, Neural ASP, Neuralsymnolic AI, Probabilistic Circuits, Neural Networks, Neural Probabilistic Predicate},
Note = {The goal of combining the robustness of neural networks and the expressiveness of symbolic
methods has rekindled the interest in Neuro-Symbolic AI. Deep Probabilistic Programming
Languages (DPPLs) have been developed for probabilistic logic programming to be carried
out via the probability estimations of deep neural networks (DNNs). However, recent
SOTA DPPL approaches allow only for limited conditional probabilistic queries and do
not offer the power of true joint probability estimation. In our work, we propose an easy
integration of tractable probabilistic inference within a DPPL. To this end, we introduce
SLASH, a novel DPPL that consists of Neural-Probabilistic Predicates (NPPs) and a logic
program, united via answer set programming (ASP). NPPs are a novel design principle
allowing for combining all deep model types and combinations thereof to be represented
as a single probabilistic predicate. In this context, we introduce a novel +/− notation
for answering various types of probabilistic queries by adjusting the atom notations of a
predicate. To scale well, we show how to prune the stochastically insignificant parts of the
(ground) program, speeding up reasoning without sacrificing the predictive performance.
We evaluate SLASH on a variety of different tasks, including the benchmark task of MNIST
addition and Visual Question Answering (VQA).},
Title = {Scalable Neural-Probabilistic Answer Set Programming},
Url = {},
Year = {2023}}
@inproceedings{willig2023consolidate,
Anote={./images/willig2023consolidate.png},
author = {Moritz Willig and Matej Zečević and Devendra Singh Dhami and Kristian Kersting},
title = {Do Not Marginalize Mechanisms, Rather Consolidate!},
Keywords = {Causality, Marginalization, Consolidation, Interventions, Structural Causal Model, Simplification, Compression},
year = {2023},
Url = {},
booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)},
booktitle = {Structural causal models (SCMs) are a powerful tool for understanding the complex causal relationships that underlie many real-world systems. As these systems grow in size, the number of variables and complexity of interactions between them does, too. Thus, becoming convoluted and difficult to analyze. This is particularly true in the context of machine learning and artificial intelligence, where an ever increasing amount of data demands for new methods to simplify and compress large scale SCM. While methods for marginalizing and abstracting SCM already exist today, they may destroy the causality of the marginalized model. To alleviate this, we introduce the concept of consolidating causal mechanisms to transform large-scale SCM while preserving consistent interventional behaviour. We show consolidation is a powerful method for simplifying SCM, discuss reduction of computational complexity and give a perspective on generalizing abilities of consolidated SCM.}
}
@inproceedings{bellagente2023multifusion,
Anote={./images/bellagente2023multifusion.png},
author = {Marco Bellagente and Manuel Brack and Hannah Teufel and Felix Friedrich and Björn Deiseroth and Constantin Eichenberg and Andrew Dai and Robert Baldock and Souradeep Nanda and Koen Oostermeijer and Andres Felipe Cruz-Salinas and Patrick Schramowski and Kristian Kersting and Samuel Weinbach},
title = {MultiFusion: Fusing Pre-Trained Models for Multi-Lingual, Multi-Modal Image Generation},
year = {2023},
Url = {https://arxiv.org/abs/2305.15296},
Pages = {},
Note = {The recent popularity of text-to-image diffusion models (DM) can largely be attributed to the intuitive interface they provide to users. The intended generation can be expressed in natural language, with the model producing faithful interpretations of text prompts. However, expressing complex or nuanced ideas in text alone can be difficult. To ease image generation, we propose MultiFusion that allows one to express complex and nuanced concepts with arbitrarily interleaved inputs of multiple modalities and languages. MutliFusion leverages pre-trained models and aligns them for integration into a cohesive system, thereby avoiding the need for extensive training from scratch. Our experimental results demonstrate the efficient transfer of capabilities from individual modules to the downstream model. Specifically, the fusion of all independent components allows the image generation module to utilize multilingual, interleaved multimodal inputs despite being trained solely on monomodal data in a single language.},
Keywords = {Image Synthesis, Image Generation, Diffusion, Multimodality, Multilingualism},
booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)}
}
@inproceedings{deiseroth2023atman,
Anote={./images/deb2023atman.png},
author = {Björn Deiseroth and Mayukh Deb and Samuel Weinbach and Manuel Brack and Patrick Schramowski and Kristian Kersting},
title = {AtMan: Understanding Transformer Predictions Through Memory Efficient Attention Manipulation},
Keywords = {Explainable AI, Transformer, Large Language Models, Multimodal, Computer Vision},
year = {2023},
Url = {https://arxiv.org/abs/2301.08110},
booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)},
Note= {Generative transformer models have become increasingly complex, with large numbers of parameters and the ability to process multiple input modalities. Current methods for explaining their predictions are resource-intensive. Most crucially, they require prohibitively large amounts of additional memory since they rely on backpropagation which allocates almost twice as much GPU memory as the forward pass. This renders it difficult, if not impossible, to use explanations in production. We present AtMan that provides explanations of generative transformer models at almost no extra cost. Specifically, AtMan is a modality-agnostic perturbation method that manipulates the attention mechanisms of transformers to produce relevance maps for the input with respect to the output prediction. Instead of using backpropagation, AtMan applies a parallelizable token-based search method relying on cosine similarity neighborhood in the embedding space. Our exhaustive experiments on text and image-text benchmarks demonstrate that AtMan outperforms current state-of-the-art gradient-based methods on several metrics and models while being computationally efficient. As such, AtMan is suitable for use in large model inference deployments.}
}
@inproceedings{brack2023sega,
Anote = {./images/sega_graphic.png},
title={SEGA: Instructing Text-to-Image Models using Semantic Guidance},
author={Manuel Brack and Felix Friedrich and Dominik Hintersdorf and Lukas Struppek and Patrick Schramowski and Kristian Kersting},
year = {2023},
month={Dez},
Note = {Text-to-image diffusion models have recently received a lot of interest for their astonishing ability to produce high-fidelity images from text only. However, achieving one-shot generation that aligns with the user’s intent is nearly impossible, yet small changes to the input prompt often result in very different images. This leaves the user with little semantic control. To put the user in control, we show how to interact with the diffusion process to flexibly steer it along semantic directions. This semantic guidance (SEGA) generalizes to any generative architecture using classifier-free guidance. More importantly, it allows for subtle and extensive edits, composition and style changes, and optimizing the overall artistic conception. We demonstrate SEGA’s effectiveness on both latent and pixel-based diffusion models such as Stable Diffusion, Paella, and DeepFloyd-IF using a variety of tasks, thus providing strong evidence for its versatility and flexibility.},
Pages = {},
Keywords = {Representations, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, Concepts, Semantics},
Url={https://arxiv.org/abs/2211.05105},
booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)}
}
@inproceedings{yu2023neurips_cc,
Anote = {./images/yu2023tpm_cc.png},
Author = {Zhongjie Yu and Martin Trapp and Kristian Kersting},
title = {Characteristic Circuit},
Keywords = {Characteristic Circuit, Characteristic Function, Probabilistic Circuit, Heterogeneous Data, Density Estimation, Mixed Models, Hybrid Data},
year = {2023},
Url = {},
Pages = {},
booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)},
Note= {In many real-world scenarios it is crucial to be able to reliably and efficiently reason under uncertainty while capturing complex relationships in data. Probabilistic circuits (PCs), a prominent family of tractable probabilistic models, offer a remedy to this challenge by composing simple, tractable distributions into a high-dimensional probability distribution. However, learning PCs on heterogeneous data is challenging and densities of some parametric distributions are not available in closed form, limiting their potential use. We introduce characteristic circuits (CCs), a family of tractable probabilistic models providing a unified formalization of distributions over heterogeneous data in the spectral domain. The one-to-one relationship between characteristic functions and probability measures enables us to learn high-dimensional distributions on heterogeneous data domains and facilitates efficient probabilistic inference even when no closed-form density function is available. We show that the structure and parameters of CCs can be learned efficiently from the data and find that CCs outperform state-of-the-art density estimators for heterogeneous data domains on common benchmark data sets.}
}
@inproceedings{delfosse2023nudge,
Anote={./images/delfosse2023nudge.png},
author = {Quentin Delfosse and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
title = {Interpretable and Explainable Logical Policies via Neurally Guided Symbolic Abstraction},
Keywords = {Reinforcement Learning, Neuro-Symbolic AI, Interpretable and Explainable AI},
booktitle = {Proceedings of the 37th Conference on Neural Information Processing Systems (NeurIPS)},
year = {2023},
Url = {https://arxiv.org/abs/2306.01439},
Pages = {},
Crossref={https://github.com/k4ntz/LogicRL},
Note = {The limited priors required by neural networks make them the dominating choice to encode and learn policies using reinforcement learning (RL). However, they are also black-boxes, making it hard to understand the agent's behaviour, especially when working on the image level. Therefore, neuro-symbolic RL aims at creating policies that are interpretable in the first place. Unfortunately, interpretability is not explainability. To achieve both, we introduce Neurally gUided Differentiable loGic policiEs (NUDGE). NUDGE exploits trained neural network-based agents to guide the search of candidate-weighted logic rules, then uses differentiable logic to train the logic agents. Our experimental evaluation demonstrates that NUDGE agents can induce interpretable and explainable policies while outperforming purely neural ones and showing good flexibility to environments of different initial states and problem sizes.}
}
@misc{brack2023distilling,
Anote = {./images/brack2023distilling.png},
title={Distilling Adversarial Prompts from Safety Benchmarks: Report for the Adversarial Nibbler Challenge },
author={Manuel Brack and Patrick Schramowski and Kristian Kersting},
Howpublished = {arXiv preprint},
year = {2023},
Note = {Text-conditioned image generation models have recently achieved astonishing image quality and alignment results. Consequently, they are employed in a fast-growing number of applications. Since they are highly data-driven, relying on billion-sized datasets randomly scraped from the web, they also produce unsafe content. As a contribution to the Adversarial Nibbler challenge, we distill a large set of over 1,000 potential adversarial inputs from existing safety benchmarks. Our analysis of the gathered prompts and corresponding images demonstrates the fragility of input filters and provides further insights into systematic safety issues in current generative image models.},
Pages = {},
Keywords = {Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, Safety, Adversarial Prompting},
Url={../../papers/brack2023distilling.pdf}
}
@incollection{yu2023tpm_cc,
Anote = {./images/yu2023tpm_cc.png},
Author = {Zhongjie Yu and Martin Trapp and Kristian Kersting},
Booktitle = {Working Notes of the UAI Workshop on Tractable Probabilistic Modeling (TPM)},
Note = {In many real-world scenarios it is crucial to be able to reliably and efficiently reason under uncertainty while capturing complex relationships in data. Probabilistic circuits (PCs), a prominent family of tractable probabilistic models, offer a remedy to this challenge by composing simple, tractable distributions into a high-dimensional probability distribution. However, learning PCs on heterogeneous data is challenging and densities of some parametric distributions are not available in closed form, limiting their potential use. We introduce characteristic circuits (CCs), a family of tractable probabilistic models providing a unified formalization of distributions over heterogeneous data in the spectral domain. The one-to-one relationship between characteristic functions and probability measures enables us to learn high-dimensional distributions on heterogeneous data domains and facilitates efficient probabilistic inference even when no closed-form density function is available. We show that the structure and parameters of CCs can be learned efficiently from the data and find that CCs outperform state-of-the-art density estimators for heterogeneous data domains on common benchmark data sets.},
Keywords = {Characteristic Circuit, Characteristic Function, Probabilistic Circuit, Heterogeneous Data, Density Estimation},
Pages = {},
Title = {Characteristic Circuit},
Url = {./papers/yu2023tpm_cc.pdf},
crossref = {},
Year = {2023}
}
@misc{delfosse2023nudge,
Anote={./images/delfosse2023nudge.png},
author = {Quentin Delfosse and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
title = {Interpretable and Explainable Logical Policies via Neurally Guided Symbolic Abstraction},
Keywords = {Reinforcement Learning, Neuro-Symbolic AI, Interpretable and Explainable AI},
Howpublished = {arXiv preprint arXiv:2306.01439},
year = {2023},
Url = {https://arxiv.org/abs/2306.01439},
Pages = {},
Crossref={https://github.com/k4ntz/LogicRL},
Note = {The limited priors required by neural networks make them the dominating choice to encode and learn policies using reinforcement learning (RL). However, they are also black-boxes, making it hard to understand the agent's behaviour, especially when working on the image level. Therefore, neuro-symbolic RL aims at creating policies that are interpretable in the first place. Unfortunately, interpretability is not explainability. To achieve both, we introduce Neurally gUided Differentiable loGic policiEs (NUDGE). NUDGE exploits trained neural network-based agents to guide the search of candidate-weighted logic rules, then uses differentiable logic to train the logic agents. Our experimental evaluation demonstrates that NUDGE agents can induce interpretable and explainable policies while outperforming purely neural ones and showing good flexibility to environments of different initial states and problem sizes.}
}
@misc{shindo2023neumann,
Anote={./images/shindo2023neumann.png},
author = {Hikaru Shindo and Viktor Pfanschilling and Devendra Singh Dhami and Kristian Kersting},
title = {Learning Differentiable Logic Programs for Abstract Visual Reasoning},
Keywords = {Differentiable Reasoning, Inductive Logic Programming, Neuro-Symbolic AI, Object-centric Learning, Graph Neural Network},
Howpublished = {arXiv preprint arXiv:2307.00928},
year = {2023},
Url = {https://arxiv.org/abs/2307.00928},
Pages = {},
Crossref={https://sites.google.com/view/neumann-tuda},
Note = {Visual reasoning is essential for building intelligent agents that understand the world and perform problem-solving beyond perception. Differentiable forward reasoning has been developed to integrate reasoning with gradient-based machine learning paradigms. However, due to the memory intensity, most existing approaches do not bring the best of the expressivity of first-order logic, excluding a crucial ability to solve abstract visual reasoning, where agents need to perform reasoning by using analogies on abstract concepts in different scenarios. To overcome this problem, we propose NEUro-symbolic Message-pAssiNg reasoNer (NEUMANN), which is a graph-based differentiable forward reasoner, passing messages in a memory-efficient manner and handling structured programs with functors. Moreover, we propose a computationally-efficient structure learning algorithm to perform explanatory program induction on complex visual scenes. To evaluate, in addition to conventional visual reasoning tasks, we propose a new task, visual reasoning behind-the-scenes, where agents need to learn abstract programs and then answer queries by imagining scenes that are not observed. We empirically demonstrate that NEUMANN solves visual reasoning tasks efficiently, outperforming neural, symbolic, and neuro-symbolic baselines.}
}
@incollection{sha2023nesy,
author = {Jingyuan Sha and Hikaru Shindo and Kristian Kersting and Devendra Singh Dhami},
title = { Neural-Symbolic Predicate Invention: Learning Relational Concepts from Visual Scenes},
Anote = {./images/sha2023nesypi.png},
Keywords = {Differentiable Reasoning, Inductive Logic Programming, Neuro-Symbolic AI, Object-centric Learning},
Note = {The predicates used for Inductive Logic Programming (ILP) systems are usually elusive and need to be hand-crafted in advance, which limits the generalization of the system when learning new rules without sufficient background knowledge. Predicate Invention (PI) for ILP is the problem of discovering new concepts that describe hidden relationships in the domain. PI can mitigate the generalization problem for ILP by inferring new concepts, giving the system a better vocabulary to compose logic ruless. Although there are several PI approaches for symbolic ILP systems, PI for NeSy ILP systems that can handle visual input to learn logical rules using differentiable reasoning is relatively unaddressed. To this end, we propose a neural-symbolic approach, NeSy-𝜋, to invent predicates from visual scenes for NeSy ILP systems based on clustering and extension of relational concepts. (𝜋 denotes the abbrivation of Predicate Invention). NeSy-𝜋 processes visual scenes as input using deep neural networks for the visual perception and invents new concepts that support the task of classifying complex visual scenes. The invented concepts can be used by any NeSy ILP systems instead of hand-crafted background knowledge. Our experiments show that the PI model is capable of inventing high-level concepts and solving complex visual logic patterns more efficiently and accurately in the absence of explicit background knowledge.Moreover, the invented concepts are explainable and interpretable, while also providing competitive results with state-of-the-art NeSy ILP systems based on given knowledge.},
year={2023},
booktitle = {Proceedings of the 17th International Workshop on Neural-Symbolic Learning and Reasoning (NeSy)},
Url = {https://www.cs.ox.ac.uk/isg/conferences/tmp-proceedings/NeSy2023/paper8.pdf}
}
@misc{helff2023vlol,
author = {Lukas Helff and Wolfgang Stammer and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
title = {V-LoL: A Diagnostic Dataset for Visual Logical Learning},
Anote = {./images/helff2023vlol.png},
Keywords = {Visual Logical Learning, Diagnostic Dataset, Benchmark, Dataset},
Note = {Despite the successes of recent developments in visual AI, different shortcomings still exist; from missing exact logical reasoning, to abstract generalization abilities, to understanding complex and noisy scenes. Unfortunately, existing benchmarks, were not designed to capture more than a few of these aspects. Whereas deep learning datasets focus on visually complex data but simple visual reasoning tasks, inductive logic datasets involve complex logical learning tasks, however, lack the visual component. To address this, we propose the visual logical learning dataset, V-LoL, that seamlessly combines visual and logical challenges. Notably, we introduce the first instantiation of V-LoL, V-LoL-Trains, – a visual rendition of a classic benchmark in symbolic AI, the Michalski train problem. By incorporating intricate visual scenes and flexible logical reasoning tasks within a versatile framework, V-LoL provides a platform for investigating a wide range of visual logical learning challenges. We evaluate a variety of AI systems including traditional symbolic AI, neural AI, as well as neuro-symbolic AI. Our evaluations demonstrate that even state-of-the-art AI faces difficulties in dealing with visual logical learning challenges, highlighting unique advantages and limitations specific to each methodology. Overall, V-LoL opens up new avenues for understanding and enhancing current abilities in visual logical learning for AI systems.},
Howpublished = {arXiv preprint arXiv:2306.07743},
year = {2023},
Url = {https://arxiv.org/abs/2306.07743},
Pages = {},
Crossref={https://sites.google.com/view/v-lol}
}
@inproceedings{brack2023illume,
author = {Manuel Brack and Patrick Schramowski and Björn Deiseroth and Kristian Kersting},
title = {ILLUME: Rationalizing Vision-Language Models through Human Interactions},
Anote = {./images/brack2022illume.png},
Keywords = {Alignement, Self-Generated Explanations, XAI, Explanatory Interactive Learning},
Note = {Bootstrapping from pre-trained language models has been proven to be an efficient approach for building vision-language models (VLM) for tasks such as image captioning or visual question answering. However, outputs of these models rarely align with user's rationales for specific answers. In order to improve this alignment and reinforce commonsense reasons, we propose a tuning paradigm based on human interactions with machine generated data. Our ILLUME executes the following loop: Given an image-question-answer prompt, the VLM samples multiple candidate rationales, and a human critic provides minimal feedback via preference selection, used for fine-tuning. This loop increases the training data and gradually carves out the VLM's rationalization capabilities that are aligned with human intend. Our exhaustive experiments demonstrate that ILLUME is competitive with standard supervised fine-tuning while using significantly fewer training data and only requiring minimal feedback.},
year={2023},
month={Jul},
booktitle = {Proceedings of the 40th International Conference on Machine Learning (ICML)},
Url = {https://arxiv.org/pdf/2208.08241.pdf}
}
@inproceedings{schramowski2022safe,
Anote = {./images/schramowski2022safe.png},
title={Safe Latent Diffusion: Mitigating Inappropriate Degeneration in Diffusion Models},
author={Patrick Schramowski and Manuel Brack and Björn Deiseroth and Kristian Kersting},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2023},
month={Jun},
Note = {Text-conditioned image generation models have recently achieved astonishing results in image quality and text alignment and are consequently employed in a fast-growing number of applications. Since they are highly data-driven, relying on billion-sized datasets randomly scraped from the internet, they also suffer, as we demonstrate, from degenerated and biased human behavior. In turn, they may even reinforce such biases. To help combat these undesired side effects, we present safe latent diffusion (SLD). Specifically, to measure the inappropriate degeneration due to unfiltered and imbalanced training sets, we establish a novel image generation test bed-inappropriate image prompts (I2P)-containing dedicated, real-world image-to-text prompts covering concepts such as nudity and violence. As our exhaustive empirical evaluation demonstrates, the introduced SLD removes and suppresses inappropriate image parts during the diffusion process, with no additional training required and no adverse effect on overall image quality or text alignment.},
Pages = {},
Keywords = {Safety, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, Ethics},
Url={https://arxiv.org/abs/2211.05105}
}
@inproceedings{sudhakaran2023iccv,
Anote={./images/sudhakaran2023iccv.png},
Url = {},
author = {Gopika Sudhakaran and Devendra Dhami and Kristian Kersting and Stefan Roth},
title = {Vision Relation Transformer for Unbiased Scene Graph Generation},
Note = {Recent years have seen a growing interest in Scene Graph Generation (SGG), a comprehensive visual scene understanding task that aims to predict entity relationships using a relation encoder-decoder pipeline stacked on top of an object encoder-decoder backbone. Unfortunately, current SGG methods suffer from an information loss regarding the entities' parts during the relation encoding process. To address this, we introduce the Vision rElation TransfOrmer (VETO), consisting of a novel entity part-level relation encoder. We further observe that many existing SGG methods claim to be unbiased, but are still biased towards either head or tail classes. To overcome this bias, we introduce a Mutually Exclusive ExperT (MEET) learning strategy that captures important relation features without bias towards head or tail classes. Exhaustive experimental results on the VG and GQA datasets demonstrate that VETO + MEET boosts the predictive performance by up to 47% over the state of the art while being ~10x smaller.},
year={2023},
Pages = {},
Keywords = {Transformer, Scence Graph Generation, Relational, Objects, Multimodal},
booktitle={Proceedings of the 19th IEEE/CVF International Conference on Computer Vision (ICCV)}
}
@inproceedings{struppek2023iccv,
Anote={./images/struppek2023iccv.png},
author = {Lukas Struppek and Dominik Hintersdorf and Kristian Kersting},
title = {Rickrolling the Artist: Injecting Backdoors into Text Encoders for Text-to-Image Synthesis},
Note = {While text-to-image synthesis currently enjoys great popularity among researchers and the general public, the security of these models has been neglected so far. Many text-guided image generation models rely on pre-trained text encoders from external sources, and their users trust that the retrieved models will behave as promised. Unfortunately, this might not be the case. We introduce backdoor attacks against text-guided generative models and demonstrate that their text encoders pose a major tampering risk. Our attacks only slightly alter an encoder so that no suspicious model behavior is apparent for image generations with clean prompts. By then inserting a single character trigger into the prompt, e.g., a non-Latin character or emoji, the adversary can trigger the model to either generate images with pre-defined attributes or images following a hidden, potentially malicious description. We empirically demonstrate the high effectiveness of our attacks on Stable Diffusion and highlight that the injection process of a single backdoor takes less than two minutes. Besides phrasing our approach solely as an attack, it can also force an encoder to forget phrases related to certain concepts, such as nudity or violence, and help to make image generation safer.},
year={2023},
Pages = {},
Keywords = {Backdoor Attack, Generative AI, CLIP, Text2Image Synthesis, Homoglyphs},
booktitle={Proceedings of the 19th IEEE/CVF International Conference on Computer Vision (ICCV)},
Url = {https://arxiv.org/abs/2211.02408},
}
@inproceedings{friedrich2023ecai,
author = {Felix Friedrich and Wolfgang Stammer and Patrick Schramowski and Kristian Kersting},
title = {Revision Transformers: Instructing Language Models to Change their Values},
Anote = {./images/friedrich2023ecai.png},
Keywords = {Transformer, Retriever, Revisions, Machine Ethics},
Note = {Current transformer language models (LM) are large-scale models with billions of parameters. They have been shown to provide high performances on a variety of tasks but are also prone to shortcut learning and bias. Addressing such incorrect model behavior via parameter adjustments is very costly. This is particularly problematic for updating dynamic concepts, such as moral values, which vary culturally or interpersonally. In this work, we question the current common practice of storing all information in the model parameters and propose the Revision Transformer (RiT) employing information retrieval to facilitate easy model updating. The specific combination of a large-scale pre-trained LM that inherently but also diffusely encodes world knowledge with a clear-structured revision engine makes it possible to update the model's knowledge with little effort and the help of user interaction. We exemplify RiT on a moral dataset and simulate user feedback demonstrating strong performance in model revision even with small data. This way, users can easily design a model regarding their preferences, paving the way for more transparent and personalized AI models.},
year={2023},
booktitle = {Proceedings of the 26th European Conference on Artificial Intelligence (ECAI)},
Url = {https://arxiv.org/pdf/2210.10332.pdf}
}
@inproceedings{sidheekh2023uai,
Anote={./images/sidheekh2023uai.png},
Url = {./papers/sidheekh2023uai.pdf},
author = {Sahil Sidheekh and Kristian Kersting and Sriraam Natarajan},
title = {Probabilistic Flow Circuits: Towards Unified Deep Models for Tractable Probabilistic Inference},
Note = {We consider the problem of increasing the expressivity of probabilistic circuits by augmenting them with the successful generative models of normalizing flows. To this effect, we theoretically establish the requirement of decomposability for such combinations to retain tractability of the learned models. Our model, called Probabilistic Flow Circuits, essentially extends circuits by allowing for normalizing flows at the leaves. Our empirical evaluation clearly establishes the expressivity and tractability of this new class of probabilistic circuits},
year={2023},
Pages = {},
Keywords = {Probabilistic Circuits, Normalizing Flows, Deep Learning, Tractablity},
booktitle={Proceedings of the 39th Conference on Uncertainty in Artificial Intelligence (UAI)}
}
@inproceedings{ventola2023uai,
Anote={./images/ventola-braun2023tdi.png},
Url = {https://proceedings.mlr.press/v216/ventola23a/ventola23a.pdf},
author = {Fabrizio Ventola and Steven Braun and Zhongjie Yu and Martin Mundt and Kristian Kersting},
title = {Probabilistic Circuits That Know What They Don't Know},
Note = {Probabilistic circuits (PCs) are models that allow exact and tractable probabilistic inference. In contrast to neural networks, they are often assumed to be well-calibrated and robust to out-of-distribution (OOD) data. In this paper, we show that PCs are in fact not robust to OOD data, i.e., they don’t know what they don’t know. We then show how this challenge can be overcome by model uncertainty quantification. To this end, we propose tractable dropout inference (TDI), an inference procedure to estimate uncertainty by deriving an analytical solution to Monte Carlo dropout (MCD) through variance propagation. Unlike MCD in neural networks, which comes at the cost of multiple network evaluations, TDI provides tractable sampling-free uncertainty estimates in a single forward pass. TDI improves the robustness of PCs to distribution shift and OOD data, demonstrated through a series of experiments evaluating the classification confidence and uncertainty estimates on real-world data.},
year={2023},
Pages = {},
Keywords = {Probabilistic Circuits, Dropout, Out-Of-Distribution, Tractable Dropput Inference, Robustness},
booktitle={Proceedings of the 39th Conference on Uncertainty in Artificial Intelligence (UAI)}
}
@inproceedings{delfosse2023ecml,
Anote = {./images/delfosse2023ecml.png},
title={Boosting Object Representation Learning via Motion and Object Continuity},
author={Quentin Delfosse and Wolfgang Stammer and Thomas Rothenbächer and Dwarak Vittal and Kristian Kersting},
Note = {Recent unsupervised multi-object detection models have shown impressive performance improvements, largely attributed to novel architectural inductive biases. Unfortunately, despite their good object localization and segmentation capabilities, their object encodings may still be suboptimal for downstream reasoning tasks, such as reinforcement learning. To overcome this, we propose to exploit object motion and continuity (objects do not pop in and out of existence). This is accomplished through two mechanisms: (i) providing temporal loss-based priors on object locations, and (ii) a contrastive object continuity loss across consecutive frames. Rather than developing an explicit deep architecture, the resulting unsupervised Motion and Object Continuity (MOC) training scheme can be instantiated using any object detection model baseline. Our results show large improvements in the performances of variational and slot-based models in terms of object discovery, convergence speed and overall latent object representations, particularly for playing Atari games. Overall, we show clear benefits of integrating motion and object continuity for downstream reasoning tasks, moving beyond object representation learning based only on reconstruction as well as evaluation based only on instance segmentation quality.},
year={2023},
Pages = {},
Keywords = {Object Continuity, Object-Centric Deep Learning, Reinforcement Learning, Motion Supervision},
booktitle={Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML PKDD)},
Url={}
}
@inproceedings{kohaut2023md,
Anote={./images/kohaut2023md.png},
title={Mission Design for Unmanned Aerial Vehicles using Hybrid Probabilistic Logic Programs},
author={Simon Kohaut and Benedict Flade and Devendra Singh Dhami and Julian Eggert and Kristian Kersting},
booktitle={26th IEEE International Intelligent Transportation Systems Conference (ITSC)},
note = {Advanced Air Mobility (AAM) is a growing field
that demands a deep understanding of legal, spatial and temporal
concepts in navigation. Hence, any implementation of AAM is
forced to deal with the inherent uncertainties of human-inhabited
spaces. Enabling growth and innovation requires the creation of
a system for safe and robust mission design, i.e., the way we
formalise intentions and decide their execution as trajectories for
the Unmanned Aerial Vehicle (UAV). Although legal frameworks
have emerged to govern urban airspaces, their full integration
into the decision process of autonomous agents and operators
remains an open task. In this work we present ProMis, a system
architecture for probabilistic mission design. It links the data
available from various static and dynamic data sources with
legal text and operator requirements by following principles of
formal verification and probabilistic modeling. Using ProMis,
combining low-level perception and high-level rules in AAM
can be applied to the navigation frame of the UAV. To this
end, we employ Hybrid Probabilistic Logic Programs (HPLP)
as a unifying, intermediate representation between perception
and action-taking. Furthermore, we present methods to connect
ProMis with crowd-sourced map data by generating HPLP atoms
that represent spatial relations in a probabilistic fashion. Our
claims of the utility and generality of ProMis are supported by
experiments on a diverse set of scenarios and a discussion of the
computational demands associated with probabilistic missions.},
Url = {},
pages={},
year={2023},
Keywords = {Mission Design, Probabilistic Inference, Hybrid Probabilistic Logic}
}
@inproceedings{mathur2023psb,
Anote = {./images/mathur2023psb.png},
title={Exploiting Domain Knowledge as Causal Independencies in Modeling Gestational Diabetes},
author={Saurabh Mathur and Athresh Karanam and Predrag Radivojac and David M. Haas and Kristian Kersting and Sriraam Natarajan},
Note = {We consider the problem of modeling gestational diabetes in a clinical study and develop
a domain expert-guided probabilistic model that is both interpretable and explainable.
Specifically, we construct a probabilistic model based on causal independence (Noisy-Or)
from a carefully chosen set of features. We validate the efficacy of the model on the clinical
study and demonstrate the importance of the features and the causal independence model.},
year={2023},
Pages = {},
Keywords = {Bayesian Network, Noisy-Or, Causal Independence, Gestational Diabete},
booktitle={Proceedings of the 28th Pacific Symposium on Biocomputing (PSB)},
Url={./papers/mathur2023psb.pdf}
}
@inproceedings{haemmerl2023fofindingsACL,
url = {./papers/haemmerl2023fofindingsACL.pdf},
author = {Katharina Hämmerl and Bjoern Deiseroth and Patrick Schramowski and Jindřich Libovický and Constantin Rothkopf and Alexander Fraser and Kristian Kersting },
title = {Speaking Multiple Languages Affects the Moral Bias of Language Models},
Anote = {./images/haemmerl2023fofindingsACL.png},
Keywords = {Alignement, Values, Sovial Norms, LLM, multi-lingual},
Note = {Pre-trained multilingual language models (PMLMs) are commonly used when dealing
with data from multiple languages and crosslingual transfer. However, PMLMs are trained
on varying amounts of data for each language. In practice this means their performance is often much better on English than many other languages. We explore to what extent this also
applies to moral norms. Do the models capture moral norms from English and impose them
on other languages? Do the models exhibit random and thus potentially harmful beliefs in
certain languages? Both these issues could negatively impact cross-lingual transfer and potentially lead to harmful outcomes. In this paper, we (1) apply the MORALDIRECTION framework to multilingual models, comparing results
in German, Czech, Arabic, Chinese, and English, (2) analyse model behaviour on filtered
parallel subtitles corpora, and (3) apply the models to a Moral Foundations Questionnaire,
comparing with human responses from different countries. Our experiments demonstrate
that PMLMs do encode differing moral biases, but these do not necessarily correspond to cultural differences or commonalities in human opinions.},
year={2023},
booktitle = {Findings of the Association for Computational Linguistics (ACL)},
Url = {https://arxiv.org/pdf/2208.08241.pdf}
}
@misc{delfosse2023ocatari,
Anote={./images/delfosse2023ocatari.png},
author = {Quentin Delfosse and Jannis Blüml and Bjarne Gregori and Sebastian Sztwiertnia and Kristian Kersting},
title = {OCAtari: Object-Centric Atari 2600 Reinforcement Learning Environments},
Howpublished = {arXiv preprint arXiv:2306.08649},
year = {2023},
Url = {https://arxiv.org/abs/2306.08649},
Pages = {},
Crossref={https://github.com/k4ntz/OC_Atari},
Note = {Cognitive science and psychology suggest that object-centric representations of complex
scenes are a promising step towards enabling efficient abstract reasoning from low-level
perceptual features. Yet, most deep reinforcement learning approaches rely on only pixel-based
representations that do not capture the compositional properties of natural scenes. For this,
we need environments and datasets that allow us to work and evaluate object-centric approaches.
We present OCAtari, a set of environment that provides object-centric state representations of
Atari games, the most-used evaluation framework for deep RL approaches. OCAtari also allows for
RAM state manipulations of the games to change and create specific or even novel situations.},
Keywords = {Object-centric, Atari, Benchmarks, Reinforcement Learning}
}
@misc{braun2023cake,
Anote={./images/braun2023cake.png},
copyright = {Creative Commons Attribution 4.0 International},
title={Deep Classifier Mimicry without Data Access},
author={Steven Braun and Martin Mundt and Kristian Kersting},
year={2023},
Pages = {},
Howpublished={arXiv preprint arXiv:2306.02090},
Url={https://arxiv.org/pdf/2306.02090.pdf},
Note={Access to pre-trained models has recently emerged as a standard across numerous
machine learning domains. Unfortunately, access to the original data the models
were trained on may not equally be granted. This makes it tremendously challenging to fine-tune,
compress models, adapt continually, or to do any other type of data-driven update. We posit that
original data access may however not be required. Specifically, we propose Contrastive Abductive
Knowledge Extraction (CAKE), a model-agnostic knowledge distillation procedure that mimics deep
classifiers without access to the original data. To this end, CAKE generates pairs of noisy
synthetic samples and diffuses them contrastively toward a model's decision boundary. We
empirically corroborate CAKE's effectiveness using several benchmark datasets and various
architectural choices, paving the way for broad application.},
Keywords = {Fine-Tuning, Pre-Trained Models, Knowledge Distillation, Contrastive Abductive
Knowledge Extraction}
}
@incollection{weil2023knowYourEnemy,
Anote={./images/weil2023knowYourEnemy.png},
author={Jannis Weil and Johannes Czech and Tobias Meuser and Kristian Kersting},
title={Know your Enemy: Investigating Monte-Carlo Tree Search with Opponent Models in Pommerman},
booktitle={Proceedings of AAMAS Workshop Adaptive and Learning Agents (ALA)},
Url={https://alaworkshop2023.github.io/papers/ALA2023_paper_45.pdf},
Crossref={https://github.com/jw3il/PommerLearn},
year={2023},
Pages={},
Note={In combination with Reinforcement Learning, Monte-Carlo Tree Search has shown to outperform human grandmasters in games such as Chess, Shogi and Go with little to no prior domain knowledge. However, most classical use cases only feature up to two players. Scaling the search to an arbitrary number of players presents a computational challenge, especially if decisions have to be planned over a longer time horizon. In this work, we investigate techniques that transform general-sum multiplayer games into single-player and two-player games that consider other agents to act according to given opponent models. For our evaluation, we focus on the challenging Pommerman environment which involves partial observability, a long time horizon and sparse rewards. In combination with our search methods, we investigate the phenomena of opponent modeling using heuristics and self-play. Overall, we demonstrate the effectiveness of our multiplayer search variants both in a supervised learning and reinforcement learning setting.},
Keywords={Multi-Agent Planning, Supervised Learning, Reinforcement Learning}
}
@article{bluml2023alphaze,
Anote = {./images/bluml2023alphaze.png},
Author = {Jannis Blüml and Johannes Czech and Kristian Kersting},
journal = {Frontiers in Artificial intelligence},
Note = {In recent years, deep neural networks for strategy games have made significant progress. AlphaZero-like frameworks which combine Monte-Carlo tree search with reinforcement learning have been successfully applied to numerous games with perfect information. However, they have not been developed for domains where uncertainty and unknowns abound, and are therefore often considered unsuitable due to imperfect observations. Here, we challenge this view and argue that they are a viable alternative for games with imperfect information—a domain currently dominated by heuristic approaches or methods explicitly designed for hidden information, such as oracle-based techniques. To this end, we introduce a novel algorithm based solely on reinforcement learning, called AlphaZe**, which is an AlphaZero-based framework for games with imperfect information. We examine its learning convergence on the games Stratego and DarkHex and show that it is a surprisingly strong baseline, while using a model-based approach: it achieves similar win rates against other Stratego bots like Pipeline Policy Space Response Oracle (P2SRO), while not winning in direct comparison against P2SRO or reaching the much stronger numbers of DeepNash. Compared to heuristics and oracle-based approaches, AlphaZe** can easily deal with rule changes, e.g., when more information than usual is given, and drastically outperforms other approaches in this respect.},
Keywords = {Imperfect Information Games, Deep Neural Networks, Reinforcement Learning, AlphaZero, Monte-Carlo tree search, Perfect Information Monte-Carlo},
Pages = {},
volume = {6},
isbn = {doi:10.3389/frai.2023.1014561},
Title = {AlphaZe**: AlphaZero-like baselines for imperfect information games are surprisingly strong},
Url = {https://www.frontiersin.org/articles/10.3389/frai.2023.1014561/full},
Crossref = {https://github.com/QueensGambit/CrazyAra},
Year = {2023}}
@misc{czech2023representation,
Anote={./images/czech2023representation.png},
author = {Johannes Czech and Jannis Blüml and Kristian Kersting},
title = {Representation Matters: The Game of Chess Poses a Challenge to Vision Transformers},
Howpublished = {arXiv preprint arXiv:2304.14918},
year = {2023},
Url = {https://arxiv.org/pdf/2304.14918},
Pages = {},
Crossref = {https://github.com/QueensGambit/CrazyAra},
Note = {While transformers have gained the reputation as the "Swiss army knife of AI", no one has challenged them to master the game of chess, one of the classical AI benchmarks. Simply using vision transformers (ViTs) within AlphaZero does not master the game of chess, mainly because ViTs are too slow. Even making them more efficient using a combination of MobileNet and NextViT does not beat what actually matters: a simple change of the input representation and value loss, resulting in a greater boost of up to 180 Elo points over AlphaZero.},
Keywords = {Transformer, Input Representation, Loss Formulation, Chess, Monte-Carlo Tree Search, AlphaZero}
}
@misc{weil2023know,
Anote={./images/weil2023knowYourEnemy.png},
author={Jannis Weil and Johannes Czech and Tobias Meuser and Kristian Kersting},
title = {Know your Enemy: Investigating Monte-Carlo Tree Search with Opponent Models in Pommerman},
Howpublished = {arXiv preprint arXiv:2305.13206},
year = {2023},
Url = {https://arxiv.org/pdf/2305.13206},
Crossref={https://github.com/jw3il/PommerLearn},
Pages = {},
Note={In combination with Reinforcement Learning, Monte-Carlo Tree Search has shown to outperform
human grandmasters in games such as Chess, Shogi and Go with little to no prior domain knowledge.
However, most classical use cases only feature up to two players. Scaling the search to an arbitrary
number of players presents a computational challenge, especially if decisions have to be planned
over a longer time horizon. In this work, we investigate techniques that transform general-sum
multiplayer games into single-player and two-player games that consider other agents to act
according to given opponent models. For our evaluation, we focus on the challenging Pommerman
environment which involves partial observability, a long time horizon and sparse rewards. In
combination with our search methods, we investigate the phenomena of opponent modeling using
heuristics and self-play. Overall, we demonstrate the effectiveness of our multiplayer search
variants both in a supervised learning and reinforcement learning setting.},
Keywords={Multi-Agent Planning, Supervised Learning, Reinforcement Learning}
}
@inproceedings{flade2021error,
Anote={./images/flade2021error.png},
title={Error Decomposition for Hybrid Localization Systems},
author={Flade, Benedict and Kohaut, Simon and Eggert, Julian},
booktitle={2021 IEEE International Intelligent Transportation Systems Conference (ITSC)},
Url = {https://www.honda-ri.de/pubs/pdf/4808.pdf},
pages={149--156},
year={2021},
organization={IEEE},
Keywords = {Accurate Global Positioning, Sensing, Vision, and Perception, Simulation and Modeling}
}
@misc{buhler2018dynamic,
Anote={./images/buhler2018dynamic.png},
title={Dynamic simulation model for an autonomous sailboat},
author={B{\"u}hler, Moritz and Heinz, Carsten and Kohaut, Simon},
year={2018},
Pages = {},
Howpublished={Universit{\"a}ts-und Landesbibliothek Darmstadt},
Url = {https://core.ac.uk/download/pdf/187766572.pdf},
Keywords = {Simulation and Modeling, Autonomous Sailing}
}
@incollection{brack2023mitigating,
Anote={./images/brack2023mitigating.png},
author = {Manuel Brack and Felix Friedrich and Patrick Schramowski and Kristian Kersting},
title = {Mitigating Inappropriateness in Image Generation: Can there be Value in Reflecting the World's Ugliness?},
booktitle = {Workshop on Challenges of Deploying Generative AI at ICML & Workshop on Responsible Applied Artificial Intelligence (RAAIT) at ECAI},
year = {2023},
month={Jul},
Url = {https://arxiv.org/pdf/2305.18398},
Pages = {},
Note = {Text-conditioned image generation models have recently achieved astonishing results in image quality and text alignment and are consequently employed in a fast-growing number of applications. Since they are highly data-driven, relying on billion-sized datasets randomly scraped from the web, they also reproduce inappropriate human behavior. Specifically, we demonstrate inappropriate degeneration on a large-scale for various generative text-to-image models, thus motivating the need for monitoring and moderating them at deployment. To this end, we evaluate mitigation strategies at inference to suppress the generation of inappropriate content. Our findings show that we can use models' representations of the world's ugliness to align them with human preferences.},
Keywords = {Image Synthesis, Image Generation, Diffusion, AI Ethics, Inappropriatness, Evaluation, Mitigation}
}
@incollection{friedrich2023oneexp,
Anote = {./images/friedrich2023xiltypology.png},
title = {One explanation does not fit XIL},
author={Felix Friedrich and David Steinmann and Kristian Kersting},
booktitle = {Tiny Paper in the Proceedings of the International Conference on Representation Learning (ICLR)},
Note = {Current machine learning models produce outstanding results in many areas but, at the same time, suffer from shortcut learning and spurious correlations. To address such flaws, the explanatory interactive machine learning (XIL) framework has been proposed to revise a model by employing user feedback on a model's explanation. This work sheds light on the explanations used within this framework. In particular, we investigate simultaneous model revision through multiple explanation methods. To this end, we identified that one explanation does not fit XIL and propose considering multiple ones when revising models via XIL.},
Keywords = {Explanatory Interactive Machine Learning (XIL), Explainable Artificial Intelligence (XAI), Human-AI Interaction, Human-guided AI},
year={2023},
Url={../../papers/friedrich2023oneexp.pdf}
}
@misc{struppek23caia,
Anote={./images/caia.jpeg},
author = {Lukas Struppek and Dominik Hintersdorf and Felix Friedrich and Manuel Brack and Patrick Schramowski and Kristian Kersting},
title = {Class Attribute Inference Attacks: Inferring Sensitive Class Information by Diffusion-Based Attribute Manipulations},
Howpublished = {arXiv preprint arXiv:2303.09289},
year = {2023},
Url = {https://arxiv.org/pdf/2303.09289},
Pages = {},
Note = {Neural network-based image classifiers are powerful tools for computer vision tasks, but they inadvertently reveal sensitive
attribute information about their classes, raising concerns about their privacy. To investigate this privacy leakage, we introduce the first
Class Attribute Inference Attack (Caia), which leverages recent advances in text-to-image synthesis to infer sensitive attributes of individual
classes in a black-box setting, while remaining competitive with related white-box attacks. Our extensive experiments in the face recognition
domain show that Caia can accurately infer undisclosed sensitive attributes, such as an individual's hair color, gender and racial appearance,
which are not part of the training labels. Interestingly, we demonstrate that adversarial robust models are even more vulnerable to such privacy
leakage than standard models, indicating that a trade-off between robustness and privacy exists.},
Keywords = {Privacy, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion}
}
@inproceedings{uhlig2022dlam,
Anote = {./images/uhlig2022anomaly_detection_fuzzy_hashes.png},
title={Combining AI and AM – Improving Approximate Matching through Transformer Networks},
author={Frieder Uhlig and Lukas Struppek and Dominik Hintersdorf and Thomas Göbel and Harald Baier and Kristian Kersting},
year={2023},
booktitle={Proceedings of the Annual Digital Forensic Research Workshop (DFRWS) USA Conference},
Note = {Approximate matching is a well-known concept in digital forensics to determine the similarity between digital artifacts.An important use case of approximate matching is the reliable and efficient detection of case-relevant data structures on a blacklist (e.g., malware or corporate secrets), if only fragments of the original are available. For instance, if only a cluster of indexed malware is still present during the digital forensic investigation, the approximate matching algorithm shall be able to assign the fragment to the
blacklisted malware. However, traditional approximate matching functions like TLSH and ssdeep fail to detect files based on their fragments if the presented piece is relatively small compared to the overall file size (e.g., like one-third of the total file). A second well-known issue with traditional approximate matching algorithms is the lack of scaling due to the ever-increasing lookup databases. In this paper, we propose an improved matching algorithm based on transformer-based models from the field of natural language processing. We call our approach
Deep Learning Approximate Matching (DLAM). As a concept from artificial intelligence, DLAM gets knowledge of characteristic blacklisted patterns during its training phase. Then DLAM is able to detect the patterns in a typically much larger file, that is DLAM focuses on the use case of fragment detection. Our evaluation is inspired by two widespread blacklist use cases: the detection of malware (e.g., in JavaScript) and corporate secrets (e.g., pdf or office documents). We reveal that DLAM has three key advantages compared to the prominent conventional approaches TLSH and ssdeep. First, it makes the tedious extraction of known to be bad parts obsolete, which is necessary until now before any search for them with approximate matching algorithms. This allows efficient classification of files on a much larger scale, which is important due to exponentially increasing data to be investigated. Second, depending on the use case, DLAM achieves a similar (in case of mrsh-cf and mrsh-v2) or
even significantly higher accuracy (in case of ssdeep and TLSH) in recovering fragments of blacklisted files. For instance, in the case of JavaScript files, our assessment shows that DLAM provides an accuracy of 93\% on our test corpus, while TLSH and ssdeep show a classification accuracy of only 50%. Third, we show that DLAM enables the detection of file correlations in the output of TLSH and ssdeep even for fragment sizes, where the respective matching function of TLSH and ssdeep fails.},
Pages = {},
Keywords = {Approximate Matching, Fuzzy Hashes, Anomaly Detection, Deep Learning},
Url={https://arxiv.org/pdf/2208.11367.pdf}
}
@misc{ye2023metalogic,
Anote = {./images/ye2022metalogic.png},
title={Differentiable Meta logical Programming},
author={Zihan Ye and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
Howpublished = {arXiv preprint arXiv:2211.11650},
year = {2022},
Pages = {},
Keywords = {Differentiable Reasoning, Meta-Interpreter, Object-centric Reasoning},
Url={https://arxiv.org/abs/2211.11650}
}
@misc{deiseroth2022logicrank,
Anote = {./images/deiseroth2022logicrank.png},
title={LogicRank: Logic Induced Reranking for Generative Text-to-Image Systems},
author={Björn Deiseroth and Patrick Schramowski and Hikaru Shindo and Devendra Singh Dhami and Kristian Kersting},
Howpublished = {arXiv preprint arXiv:2208.13518},
year = {2022},
Pages = {},
Keywords = {Differentiable Reasoning, Image Generation, CLIP},
Url={https://arxiv.org/abs/2208.13518}
}
@article{friedrich2023xiltypology,
Anote = {./images/friedrich2023xiltypology.png},
title = {A typology for exploring the mitigation of shortcut behaviour},
author={Felix Friedrich and Wolfgang Stammer and Patrick Schramowski and Kristian Kersting},
Journal = {Nature Machine Intelligence},
Note = {As machine learning models become larger, and are increasingly trained on large and uncurated datasets in weakly supervised mode, it becomes important to establish mechanisms for inspecting, interacting with and revising models. These are necessary to mitigate shortcut learning effects and to guarantee that the model’s learned knowledge is aligned with human knowledge. Recently, several explanatory interactive machine learning methods have been developed for this purpose, but each has different motivations and methodological details. In this work, we provide a unification of various explanatory interactive machine learning methods into a single typology by establishing a common set of basic modules. We discuss benchmarks and other measures for evaluating the overall abilities of explanatory interactive machine learning methods. With this extensive toolbox, we systematically and quantitatively compare several explanatory interactive machine learning methods. In our evaluations, all methods are shown to improve machine learning models in terms of accuracy and explainability. However, we found remarkable differences in individual benchmark tasks, which reveal valuable application-relevant aspects for the integration of these benchmarks in the development of future methods.},
Keywords = {Explanatory Interactive Machine Learning, XIL, Research Transparency, Research Comparability, Explainable Artificial Intelligence, XAI, Human-AI Interaction, Human-guided AI},
Publisher = {Nature Publishing Group},
year={2023},
volume={5},
pages={319-330},
issn={2522-5839},
doi={10.1038/s42256-023-00612-w},
url={https://doi.org/10.1038/s42256-023-00612-w}
}
@misc{friedrich2023fair,
Anote = {./images/ffriedrich_fair_2023.png},
title={Fair Diffusion: Instructing Text-to-Image Generation Models on Fairness},
author={Felix Friedrich and Manuel Brack and Dominik Hintersdorf and Lukas Struppek and Patrick Schramowski and Sasha Luccioni and Kristian Kersting},
Howpublished = {arXiv preprint arXiv:2302.10893},
year = {2023},
month={Feb},
Note = {Generative AI models have recently achieved astonishing results in quality and are consequently employed in a fast-growing number of applications. However, since they are highly data-driven, relying on billion-sized datasets randomly scraped from the internet, they also suffer from degenerated and biased human behavior, as we demonstrate. In fact, they may even reinforce such biases. To not only uncover but also combat these undesired effects, we present a novel strategy, called Fair Diffusion, to attenuate biases after the deployment of generative text-to-image models. Specifically, we demonstrate shifting a bias, based on human instructions, in any direction yielding arbitrarily new proportions for, e.g., identity groups. As our empirical evaluation demonstrates, this introduced control enables instructing generative image models on fairness, with no data filtering and additional training required.},
Pages = {},
Keywords = {Fairness, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, AI Ethics},
Url={https://arxiv.org/abs/2302.10893}
}
@misc{ventola-braun2023tdi,
Anote={./images/ventola-braun2023tdi.png},
Url = {https://arxiv.org/abs/2302.06544},
abbr={arXiv},
eprint={2302.06544},
archivePrefix={arXiv},
primaryClass={cs.AI},
author = {Ventola, Fabrizio and Braun, Steven and Yu, Zhongjie and Mundt, Martin and Kersting, Kristian},
keywords = {Machine Learning (cs.LG), Artificial Intelligence (cs.AI), FOS: Computer and information sciences, FOS: Computer and information sciences, I.2.0; G.3},
title = {Probabilistic Circuits That Know What They Don't Know},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International},
journal={arXiv preprint, arXiv:2302.06544},
Howpublished={arXiv preprint, arXiv:2302.06544},
Crossref={https://github.com/ml-research/tractable-dropout-inference}
}
@article{pfeuffer2023xil,
Anote = {./images/pfeuffer2023xil.png},
title = {Explanatory Interactive Machine Learning: Establishing an Action Design Research Process for Machine Learning Projects},
journal = {Business & Information Systems Engineering},
pages = {},
year = {2023},
url = {},
author = {Nicolas Pfeuffer and Lorenz Baum and Wolfgang Stammer and Benjamin M. Abdel-Karim and Patrick Schramowski and Andreas M. Bucher and Christian Hügel and Gernot Rohde and Kristian Kersting and Oliver Hinz},
keywords = {Explainable AI, Interactive Learning, Machine Learning, Action Design Research, COVID-19, Imaging, Confounders },
Note = {The most promising standard machine learning methods can deliver highly accurate
classification results and often outperform standard white-box methods. However, for
humans, it is hardly possible to fully understand the rationale behind the black-box
results, and thus, these powerful methods hamper the creation of new knowledge on
the part of humans and the acceptance of this technology on a broader basis.
Explainable Artificial Intelligence tries to solve this problem by making the results more
interpretable, while Interactive Machine Learning integrates humans into the process of
insight discovery. We build upon recent successes of combining these two cuttingedge
technologies and propose how Explanatory Interactive Machine Learning (XIL) is
embedded in a generalizable Action Design Research (ADR) process – which we call
XIL-ADR. This approach can be used to analyze data, inspect models, and iteratively
improve them. We show the application of this process and use the diagnosis of viral
pneumonia, e.g., Covid-19, as an illustrative example. By this means, this paper also
illustrates how XIL-ADR can help identify shortcomings of standard machine learning
projects, gain new insights on the part of the human user, and thereby help to tap the
full potential of AI-based systems for organizations and research.}
}
@inproceedings{ramanan2023codscomad,
Anote = {./images/ramanan2023codscomad.jpg},
title={Active Feature Acquisition via Human Interaction in Relational domains},
author={Nandini Ramanan and Phillip Odom and Kristian Kersting and Sriraam Natarajan},
booktitle={6th Joint International Conference on Data Science & Management of Data (CODS-COMAD)},
year={2023},
pages = {70--78},
url={./papers/ramanan2023codscomad.pdf},
Note={We consider the problem of interactive and explainable active feature elicitation in relational domains in which a small subset of data is fully observed while the rest of the data is minimally observed. The goal is to identify the most informative set of entities for whom acquiring additional relations would yield a more robust model. We assume the presence of a human expert who can interactively provide the relations. Thus there is a need for an explainable model. Consequently, we employ an relational tree-based distance metric to identify the most diverse set of relational examples (entities) to obtain more relational feature information on. The model that is learned iteratively is an interpretable and explainable model that is presented to the human expert for eliciting additional features. Our empirical evaluation demonstrates both the efficiency and the interpretability of the proposed approach.},
Keywords = {Human Interaction, Relational Learning, Feature Acquisition, Boosting, Non-Parametric}
}
@misc{brack2022Stable,
Anote = {./images/sega_graphic.png},
title={The Stable Artist: Steering Semantics in Diffusion Latent Space},
author={Manuel Brack and Patrick Schramowski and Felix Friedrich and Dominik Hintersdorf and Kristian Kersting},
Howpublished = {arXiv preprint arXiv:2212.06013},
year = {2022},
month={Dez},
Note = {Large, text-conditioned generative diffusion models have recently gained a lot of attention for their impressive performance in generating high-fidelity images from text alone. However, achieving high-quality results is almost unfeasible in a one-shot fashion. On the contrary, text-guided image generation involves the user making many slight changes to inputs in order to iteratively carve out the envisioned image. However, slight changes to the input prompt often lead to entirely different images being generated, and thus the control of the artist is limited in its granularity. To provide flexibility, we present the Stable Artist, an image editing approach enabling fine-grained control of the image generation process. The main component is semantic guidance (SEGA) which steers the diffusion process along variable numbers of semantic directions. This allows for subtle edits to images, changes in composition and style, as well as optimization of the overall artistic conception. Furthermore, SEGA enables probing of latent spaces to gain insights into the representation of concepts learned by the model, even complex ones such as 'carbon emission'. We demonstrate the Stable Artist on several tasks, showcasing high-quality image editing and composition.},
Pages = {},
Keywords = {Representations, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion, Concepts, Semantics},
Url={https://arxiv.org/abs/2212.06013}
}
@inproceedings{schuhmann2022laionb,
Anote = {./images/laion5b.jpg},
title={{LAION}-5B: An open large-scale dataset for training next generation image-text models},
author={Christoph Schuhmann and Romain Beaumont and Richard Vencu and Cade W Gordon and Ross Wightman and Mehdi Cherti and Theo Coombes and Aarush Katta and Clayton Mullis and Mitchell Wortsman and Patrick Schramowski and Srivatsa R Kundurthy and Katherine Crowson and Ludwig Schmidt and Robert Kaczmarczyk and Jenia Jitsev},
booktitle={Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},
year={2022},
url={https://openreview.net/forum?id=M3Y74vmsMcY},
Note={We present LAION-5B, an open, publically available dataset of 5.8B image-text pairs and validate it by reproducing results of training state-of-the-art CLIP models of different scale.},
Keywords = {multi-modal learning, large-scale datasets, reproducibility, open source, CLIP}
}
@inproceedings{alexopoulos2022how,
Anote = {./images/brack2022vullifetimes.png},
author = {Nikolaos Alexopoulos and
Manuel Brack and
Jan Philipp Wagner and
Tim Grube and
Max Mühlhäuser},
title = {How Long Do Vulnerabilities Live in the Code? A Large-Scale Empirical
Measurement Study on FOSS Vulnerability Lifetimes},
booktitle = {31st {USENIX} Security Symposium},
publisher = {{USENIX} Association},
year = {2022},
url = {https://www.usenix.org/conference/usenixsecurity22/presentation/alexopoulos},
Note={How long do vulnerabilities live in the repositories of large, evolving projects? Although the question has been identified as an interesting problem by the software community in online forums, it has not been investigated yet in adequate depth and scale, since the process of identifying the exact point in time when a vulnerability was introduced is particularly cumbersome. In this paper, we provide an automatic approach for accurately estimating how long vulnerabilities remain in the code (their lifetimes). Our method relies on the observation that while it is difficult to pinpoint the exact point of introduction for one vulnerability, it is possible to accurately estimate the average lifetime of a large enough sample of vulnerabilities, via a heuristic approach. With our approach, we perform the first large-scale measurement of Free and Open Source Software vulnerability lifetimes, going beyond approaches estimating lower bounds prevalent in previous research. We find that the average lifetime of a vulnerability is around 4 years, varying significantly between projects (~2 years for Chromium, ~7 years for OpenSSL). The distribution of lifetimes can be approximately described by an exponential distribution. There are no statistically significant differences between the lifetimes of different vulnerability types when considering specific projects. Vulnerabilities are getting older, as the average lifetime of fixed vulnerabilities in a given year increases over time, influenced by the overall increase of code age. However, they live less than non-vulnerable code, with an increasing spread over time for some projects, suggesting a notion of maturity that can be considered an indicator of quality. While the introduction of fuzzers does not significantly reduce the lifetimes of memory-related vulnerabilities, further research is needed to better understand and quantify the impact of fuzzers and other tools on vulnerability lifetimes and on the security of codebases.}
}
@misc{struppek22rickrolling,
Anote = {./images/struppek_rickrolling.jpg},
author = {Lukas Struppek and Dominik Hintersdorf and Kristian Kersting},
title = {Rickrolling the Artist: Injecting Invisible Backdoors into Text-Guided Image Generation Models},
Howpublished = {arXiv preprint arXiv:2211.02408},
year = {2022},
month={Nov},
Note = {While text-to-image synthesis currently enjoys great popularity among researchers and the general public, the security of these models has been neglected so far. Many text-guided image generation models rely on pre-trained text encoders from external sources, and their users trust that the retrieved models will behave as promised. Unfortunately, this might not be the case. We introduce backdoor attacks against text-guided generative models and demonstrate that their text encoders pose a major tampering risk. Our attacks only slightly alter an encoder so that no suspicious model behavior is apparent for image generations with clean prompts. By then inserting a single non-Latin character into the prompt, the adversary can trigger the model to either generate images with pre-defined attributes or images following a hidden, potentially malicious description. We empirically demonstrate the high effectiveness of our attacks on Stable Diffusion and highlight that the injection process of a single backdoor takes less than two minutes. Besides phrasing our approach solely as an attack, it can also force an encoder to forget phrases related to certain concepts, such as nudity or violence, and help to make image generation safer.},
Pages = {},
Keywords = {Backdoor Attacks, Text-to-Image Synthesis, Text-Guided Image Generation, Stable Diffusion},
Url={https://arxiv.org/pdf/2211.02408.pdf}
}
@unpublished{kersting2022welt_clone,
Anote = {./images/WeltAmSonntag.png},
title={Machines like us would have feelings too},
author={Kristian Kersting},
year={2022},
Howpublished = {Welt am Sonntag, October 16, 2022},
Publisher = {},
Note = {Should we clone humans using AI and robotics, if we were able to do so?},
Keywords = {AI, Clone, Humans, Machines, Optimus, Tesla, Elon Musk, Ethics, Robots},
Url={}
}
@unpublished{kersting2022welt_art,
Anote = {./images/dieWelt_logo.png},
title={The Image Machine},
author={Kristian Kersting},
year={2022},
Howpublished = {Die Welt, September 16, 2022},
Publisher = {},
Note = {An AI-generated image wins an art competition. It won't end the world. We need to figure out how to use this AI to our advantage. },
Keywords = {AI, Benefits, Stable Diffusion, Generative Model, Art, Creativity},
Url={}
}
@article{schwegmann2022energy,
Anote = {./images/schwegmann2022energy.jpg},
title = {Enabling Virtual Met Masts for wind energy applications through machine learning-methods},
journal = {Energy and AI},
pages = {100209},
year = {2022},
issn = {2666-5468},
url = {https://www.sciencedirect.com/science/article/pii/S2666546822000556},
author = {Sandra Schwegmann and Janosch Faulhaber and Sebastian Pfaffel and Zhongjie Yu and Martin Dörenkämper and Kristian Kersting and Julia Gottschall},
keywords = {Virtual Met Mast (VMM), Wind power, Machine learning, Reanalysis data, Site assessment, Weather Research and Forecasting Model (WRF), Measure-Correlate-Predict (MCP)},
Note = {As wind is the basis of all wind energy projects, a precise knowledge about its availability is needed. For an analysis of the site-specific wind conditions, Virtual Meteorological Masts (VMMs) are frequently used. VMMs make use of site calibrated numerical data to provide precise wind estimates during all phases of a wind energy project. Typically, numerical data are used for the long-term correlation that is required for estimating the yield of new wind farm projects. However, VMMs can also be used to fill data gaps or during the operational phase as an additional reference data set to detect degrading sensors. The value of a VMM directly depends on its ability and precision to reproduce site-specific environmental conditions. Commonly, linear regression is used as state of the art to correct reference data to the site-specific conditions. In this study, a framework of 10 different machine-learning methods is tested to investigated the benefit of more advanced methods on two offshore and one onshore site. We find significantly improving correlations between the VMMs and the reference data when using more advanced methods and present the most promising ones. The K-Nearest Neighbors and AdaBoost regressors show the best results in our study, but Multi-Output Mixture of Gaussian Processes is also very promising. The use of more advanced regression models lead to decreased uncertainties; hence those methods should find its way into industrial applications. The recommended regression models can serve as a starting point for the development of end-user applications and services.}
}
@misc{struppek22homoglyphs,
Anote = {./images/struppek_biased_artist.jpg},
author = {Lukas Struppek and Dominik Hintersdorf and Felix Friedrich and Manuel Brack and Patrick Schramowski and Kristian Kersting},
title = {Exploiting Cultural Biases via Homoglyphs in Text-to-Image Synthesis},
Howpublished = {arXiv preprint arXiv:2209.08891},
year = {2022},
month={Sep},
Note = {Models for text-to-image synthesis, such as DALL-E 2 and Stable Diffusion, have recently drawn a lot of interest from academia and the general public. These models are capable of producing high-quality images that depict a variety of concepts and styles when conditioned on textual descriptions. However, these models adopt cultural characteristics associated with specific Unicode scripts from their vast amount of training data, which may not be immediately apparent. We show that by simply inserting single non-Latin characters in a textual description, common models reflect cultural stereotypes and biases in their generated images. We analyze this behavior both qualitatively and quantitatively, and identify a model’s text encoder as the root cause of the phenomenon. Additionally, malicious users or service providers may try to intentionally bias the image generation to create racist stereotypes by replacing Latin characters with similarly-looking characters from non-Latin scripts, so-called homoglyphs. To mitigate such unnoticed script attacks, we propose a novel homoglyph unlearning method to fine-tune a text encoder, making it robust against homoglyph manipulations.},
Pages = {},
Keywords = {Text-to-Image Synthesis, Text-Guided Image Generation, DALL-E 2, Stable Diffusion, Computer Vision},
Url={https://arxiv.org/pdf/2209.08891.pdf}
}
@misc{hintersdorf2022clipping_privacy,
Anote = {./images/hintersdorf2022clipping_privacy.png},
title={Does CLIP Know My Face?},
author={Dominik Hintersdorf and Lukas Struppek and Manuel Brack and Felix Friedrich and Patrick Schramowski and Kristian Kersting},
year={2022},
month={Sep},
Howpublished = {arXiv preprint arXiv:2209.07341},
Note = {With the rise of deep learning in various applications, privacy concerns around the protection of training data has become a critical area of research. Whereas prior studies have focused on privacy risks in single-modal models, we introduce a novel method to assess privacy for multi-modal models, specifically vision-language models like CLIP. The proposed Identity Inference Attack (IDIA) reveals whether an individual was included in the training data by querying the model with images of the same person. Letting the model choose from a wide variety of possible text labels, the model reveals whether it recognizes the person and, therefore, was used for training. Our large-scale experiments on CLIP demonstrate that individuals used for training can be identified with very high accuracy. We confirm that the model has learned to associate names with depicted individuals, implying the existence of sensitive information that can be extracted by adversaries. Our results highlight the need for stronger privacy protection in large-scale models and suggest that IDIAs can be used to prove the unauthorized use of data for training and to enforce privacy laws.},
Pages = {},
Keywords = {Identity Inference Attacks, Privacy, Computer Vision, Pre-trained models, CLIP, Deep Learning},
Url={https://arxiv.org/pdf/2209.07341.pdf}
}
@unpublished{kersting2022welt_goodbad,
Anote = {./images/WeltAmSonntag.png},
title={Good AI, Bad AI},
author={Kristian Kersting},
year={2022},
Howpublished = {Welt am Sonntag, August 21, 2022, page 20},
Publisher = {},
Note = {If we want to make progress on AI, we need to talk more about the benefits it can bring to all of us.},
Keywords = {AI, Benefits, Downsides, Optimistic, Digital Biology, Protein Folding, Gene Experssion, Education},
Url={}
}
@incollection{trapp2022corsets_tpm,
Anote = {./images/trapp2022corsets_tpm.png},
booktitle = {Working Notes of the 5th Workshop on Tractable Probabilistic Modeling (TPM)},
title={Towards Coreset Learning in Probabilistic Circuits},
author={Martin Trapp and Steven Lang and Aastha Shah and Martin Mundt and Kristian Kersting and Arno Solin},
Note = {Probabilistic circuits (PCs) are a powerful family
of tractable probabilistic models, guaranteeing efficient and exact computation of many probabilistic
inference queries. However, their sparsely structured nature makes computations on large data sets
challenging to perform. Recent works have focused
on tensorized representations of PCs to speed up
computations on large data sets. In this work, we
present an orthogonal approach by sparsifying the
set of n observations and show that finding a coreset of k n data points can be phrased as a monotone submodular optimisation problem which can
be solved greedily for a deterministic PCs of |G|
nodes in O(k n |G|). Finally, we verify on a series
of data sets that our greedy algorithm outperforms
random selection },
year={2022},
Pages = {},
Keywords = {Corsets, Probabilistic Circuits, Deep Learning, Efficient Learning, Generative Model},
Url={./papers/trapp2022corsets_tpm.pdf}
}
@article{brugger2022pythopathology,
Anote = {./images/brugger2022pythopathology.png},
Author = {Anna Brugger and Facundo Ispizua Yamati and Abel Barreto and Stefan Paulus and Patrick Schramowski and Kristian Kersting and Ulrike Steiner and Susanne Neugart and Anne-Katrin Mahlein},
Journal = {Pythopathology},
Keywords = {Hyperspectral imaging, sugar beet, HPLC, plant metabolites, machine learning, UV-range},
Note = {Fungal infections trigger defense or signaling responses in plants, leading to various changes in plant metabolites. The changes in metabolites, for example chlorophyll or flavonoids, have long been detectable using time-consuming destructive analytical methods including high-performance liquid chromatography or photometric determination. Recent plant phenotyping studies have revealed that hyperspectral imaging (HSI) in the UV-range can be used to link spectral changes with changes in plant metabolites. To compare established destructive analytical methods with new non-destructive hyperspectral measurements, the interaction between sugar beet leaves and the pathogens Cercospora beticola, which causes Cercospora leaf spot disease (CLS), and Uromyces betae, which causes sugar beet rust (BR), was investigated. With the help of destructive analyses, we showed that both diseases have different effects on chlorophylls, carotenoids, flavonoids, and several phenols. Non-destructive hyperspectral measurements in the UV-range revealed different effects of CLS and BR on plant metabolites resulting in distinct reflectance patterns. Both diseases resulted in specific spectral changes that allowed differentiation between the two diseases. Machine learning algorithms enabled the differentiation between the symptom classes and recognition of the two sugar beet diseases. Feature importance analysis identified specific wavelengths important to the classification, highlighting the utility of the UV-range. The study demonstrates that HSI in the UV-range is a promising, non-destructive tool to investigate the influence of plant diseases on plant physiology and biochemistry.},
Pages = {44-45},
Publisher = {APS Publications},
Title = {Hyperspectral imaging in the UV-range allows for differentiation of sugar beet diseases based on changes of secondary plant metabolites},
Url = {https://doi.org/10.1094/PHYTO-03-22-0086-R},
volume = {113},
isbn = {},
number = {1},
Year = {2023}
}
@inproceedings{yu2022whittle,
Anote = {./images/yu2022whittle.png},
title={Predictive Whittle Networks for Time Series},
author={Zhongjie Yu and Fabrizio Ventola and Nils Thoma and Devendra Singh Dhami and Martin Mundt and Kristian Kersting},
Note = {Recent developments have shown that modeling in the spectral domain improves the accuracy in time series forecasting. However, state-of-the-art neural spectral forecasters do not generally yield trustworthy predictions. In particular, they lack the means to gauge predictive likelihoods and provide uncertainty estimates. We propose predictive Whittle networks to bridge this gap, which exploit both the advances of neural forecasting in the spectral domain and leverage tractable likelihoods of probabilistic circuits. For this purpose, we propose a novel Whittle forecasting loss that makes use of these predictive likelihoods to guide the training of the neural forecasting component. We demonstrate how predictive Whittle networks improve real-world forecasting accuracy, while also allowing a transformation back into the time domain, in order to provide the necessary feedback of when the model's prediction may become erratic.},
year={2022},
Pages = {},
Keywords = {Short Time Fourier Transform, Whittle Likelihood, Probabilistic Circuits, Deep Neural Networks, LSTM},
booktitle={Proceedings of the 38th Conference on Uncertainty in Artificial Intelligence (UAI), PMLR 180:2320-2330},
Url={./papers/yu2022whittle.pdf}
}
@inproceedings{mundt2022clevacompass,
booktitle = {Proceedings of the International Conference on Representation Learning (ICLR) },
title={CLEVA-Compass: A Continual Learning EValuation Assessment Compass to Promote Research Transparency and Comparability},
author={Martin Mundt and Steven Lang and Quentin Delfosse and Kristian Kersting},
year={2022},
Keywords={Continual Learning, Lifelong Learning, Machine Learning Evaluation},
Anote={./images/CLEVA-Compass.png},
Note={What is the state of the art in continual machine learning? Although a natural question for predominant static benchmarks, the notion to train systems in a lifelong manner entails a plethora of additional challenges with respect to set-up and evaluation. The latter have recently sparked a growing amount of critiques on prominent algorithm-centric perspectives and evaluation protocols being too narrow, resulting in several attempts at constructing guidelines in favor of specific desiderata or arguing against the validity of prevalent assumptions. In this work, we depart from this mindset and argue that the goal of a precise formulation of desiderata is an ill-posed one, as diverse applications may always warrant distinct scenarios. Instead, we introduce the Continual Learning EValuation Assessment Compass, CLEVA-Compass for short. The compass provides the visual means to both identify how approaches are practically reported and how works can simultaneously be contextualized in the broader literature landscape. In addition to promoting compact specification in the spirit of recent replication trends, the CLEVA-Compass thus provides an intuitive chart to understand the priorities of individual systems, where they resemble each other, and what elements are missing towards a fair comparison.},
Crossref={https://github.com/ml-research/CLEVA-Compass},
Url={https://openreview.net/pdf?id=rHMaBYbkkRJ}
}
@inproceedings{stammer2022cvpr,
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
title={Interactive Disentanglement: Learning Concepts by Interacting with their Prototype Representations},
author={Wolfgang Stammer and Marius Memmel and Patrick Schramowski and Kristian Kersting},
year={2022},
Keywords={Explanatory Interactive Learning, XAI, Concept Swapping Networks, Prototype Networks, Elementary Concept Learning},
Anote={./images/stammer2022cvpr.png},
Note={Learning visual concepts from raw images without strong supervision is a challenging task. In this work, we show the
advantages of prototype representations for understanding and revising the latent space of neural concept learners. For this purpose,
we introduce interactive Concept Swapping Networks (iCSNs), a novel framework for learning concept-grounded representations via weak
supervision and implicit prototype representations. iCSNs learn to bind conceptual information to specific prototype slots by swapping
the latent representations of paired images. This semantically grounded and discrete latent space facilitates human
understanding and human-machine interaction. We support this claim by conducting experiments on our novel data set ``Elementary
Concept Reasoning'' (ECR), focusing on visual concepts shared by geometric objects.},
Crossref={},
Url={./papers/stammer2022cvpr.pdf}
}
@inproceedings{struppek2022ppa,
Anote = {./images/struppek_ppa.jpg},
title={Plug & Play Attacks: Towards Robust and Flexible Model Inversion Attacks},
author={Lukas Struppek and Dominik Hintersdorf and Antonio De Almeida Correia and Antonia Adler and Kristian Kersting},
Note = {Model inversion attacks (MIAs) aim to create synthetic images that reflect the class-wise characteristics from a target classifier's training data by exploiting the model's learned knowledge. Previous research has developed generative MIAs using generative adversarial networks (GANs) as image priors that are tailored to a specific target model. This makes the attacks time- and resource-consuming, inflexible, and susceptible to distributional shifts between datasets. To overcome these drawbacks, we present Plug \& Play Attacks that loosen the dependency between the target model and image prior and enable the use of a single trained GAN to attack a broad range of targets with only minor attack adjustments needed. Moreover, we show that powerful MIAs are possible even with publicly available pre-trained GANs and under strong distributional shifts, whereas previous approaches fail to produce meaningful results. Our extensive evaluation confirms the improved robustness and flexibility of Plug \& Play Attacks and their ability to create high-quality images revealing sensitive class characteristics.},
year={2022},
Pages = {20522--20545},
Keywords = {Model Inversion Attacks, Secure AI, Privacy, Generative Adversarial Networks},
booktitle={Proceedings of the 39th International Conference on Machine Learning (ICML)},
Url={https://proceedings.mlr.press/v162/struppek22a/struppek22a.pdf}
}
@inproceedings{hintersdorf2022ijcai_trust,
Anote = {./images/hintersdorf2021mi.png},
title={To Trust or Not To Trust Prediction Scores for Membership Inference Attacks},
author={Dominik Hintersdorf and Lukas Struppek and Kristian Kersting},
year={2022},
booktitle={Proceedings of the 31st International Joint Conference on Artificial Intelligence and the 25th European Conference on Artificial Intelligence ({IJCAI-ECAI})},
Note = {Membership inference attacks (MIAs) aim to determine whether a specific sample was used to train a predictive model. Knowing this may indeed lead to a privacy breach. Most MIAs, however, make use of the model's prediction scores - the probability of each output given some input - following the intuition that the trained model tends to behave differently on its training data. We argue that this is a fallacy for many modern deep network architectures. Consequently, MIAs will miserably fail since overconfidence leads to high false-positive rates not only on known domains but also on out-of-distribution data and implicitly acts as a defense against MIAs. Specifically, using generative adversarial networks, we are able to produce a potentially infinite number of samples falsely classified as part of the training data. In other words, the threat of MIAs is overestimated, and less information is leaked than previously assumed. Moreover, there is actually a trade-off between the overconfidence of models and their susceptibility to MIAs: the more classifiers know when they do not know, making low confidence predictions, the more they reveal the training data.},
Pages = {},
Keywords = {Membership Inference Attacks, Privacy, Deep Learning, ResNets, Tradeoff, Overconfidence, OOD},
Url={https://www.ijcai.org/proceedings/2022/0422.pdf}
}
@inproceedings{xie2022ijcai_verify,
Anote = {./images/xie2022ijcai_verify.png},
title={Neuro-Symbolic Verification of Deep Neural Networks},
author={Xuan Xie and Kristian Kersting and Daniel Neider},
year={2022},
booktitle={Proceedings of the 31st International Joint Conference on Artificial Intelligence and the 25th European Conference on Artificial Intelligence ({IJCAI-ECAI})},
Note = {Formal verification has emerged as a powerful approach
to ensure the safety and reliability of deep
neural networks. However, current verification tools
are limited to only a handful of properties that can
be expressed as first-order constraints over the inputs
and output of a network. While adversarial robustness
and fairness fall under this category, many
real-world properties (e.g., “an autonomous vehicle
has to stop in front of a stop sign”) remain outside
the scope of existing verification technology. To mitigate
this severe practical restriction, we introduce
a novel framework for verifying neural networks,
named neuro-symbolic verification. The key idea is
to use neural networks as part of the otherwise logical
specification, enabling the verification of a wide
variety of complex, real-world properties, including
the one above. Moreover, we demonstrate how
neuro-symbolic verification can be implemented on
top of existing verification infrastructure for neural
networks, making our framework easily accessible
to researchers and practitioners alike.},
Pages = {},
Keywords = {Verification, Neurosymbolic, Deep Networks, Relative Verification},
Url={./papers/xie2022ijcai_verify.pdf}
}
@inproceedings{moosavi2022adapters,
Anote = {./images/moosavi2022adapters.png},
booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics (NAACL 2022) },
title={Adaptable Adapters},
author={Nafise Sadat Moosavi and Quentin Delfosse and Kristian Kersting and Iryna Gurevych},
Note = {State-of-the-art pretrained NLP models contain
a hundred million to trillion parameters.
Adapters provide a parameter-efficient alternative
for the full finetuning in which we can only
finetune lightweight neural network layers on
top of pretrained weights. Adapter layers are
initialized randomly. However, existing work
uses the same adapter architecture—i.e., the
same adapter layer on top of each layer of the
pretrained model—for every dataset, regardless
of the properties of the dataset or the amount
of available training data. In this work, we
introduce adaptable adapters that contain (1)
learning different activation functions for different
layers and different input data, and (2) a
learnable switch to select and only use the beneficial
adapter layers. We show that adaptable
adapters achieve on-par performances with the
standard adapter architecture while using a considerably
smaller number of adapter layers. In
addition, we show that the selected adapter architecture
by adaptable adapters transfers well
across different data settings and similar tasks.
We propose to use adaptable adapters for designing
efficient and effective adapter architectures.
The resulting adapters (a) contain about
50% of the learning parameters of the standard
adapter and are therefore more efficient
at training and inference, and require less storage
space, and (b) achieve considerably higher
performances in low-data settings},
year={2022},