@article{niang2024deep,title={The Deep Latent Position Block Model for the Clustering of Nodes in Multi-Graphs},author={Niang, Seydina Ousmane and Bouveyron, Charles and Corneli, Marco and Latouche, Pierre and Boutin, R{\'e}mi},journal={Hal preprint},year={2024},}
The deep latent position block model for the block clustering and latent representation of networks
Rémi Boutin, Pierre Latouche, and Charles Bouveyron
The current surge in data has led to a significant increase in the use of networks to model relationships between different objects, represented as nodes. Given the potential for the number of nodes to be substantial, it is essential to summarise network information through node clustering methods. To ensure interpretable results, it is also crucial to employ relevant visualisation techniques to depict the network. To tackle both issues, we propose a new methodology called the deep latent position block model (Deep LPBM). This simultaneously provides a network visualisation coherent with block modelling, allowing a clustering more general than community detection methods, as well as a continuous representation of nodes in a latent space given by partial membership vectors. Our methodology is based on a variational autoencoder strategy, relying on a graph convolutional network, with a specifically designed decoder. The inference is performed using both variational and stochastic approximations to ensure optimal results.To efficiently select the number of clusters, we compare three model selection criteria.An extensive benchmark as well as an evaluation of the partial membership estimation are provided, and we conclude with an analysis of the French political blogosphere network and a comparison with another methodology to illustrate the novelty provided by Deep LPBM results.
@article{boutin2024deeplpbm,title={The deep latent position block model for the block clustering and latent representation of networks},author={Boutin, R{\'e}mi and Latouche, Pierre and Bouveyron, Charles},journal={arXiv preprint arXiv:2412.01302},year={2024},}
2023
The Deep Latent Position Topic Model for Clustering and Representation of Networks with Textual Edges
Rémi Boutin, Pierre Latouche, and Charles Bouveyron
Numerical interactions leading to users sharing textual content published by others are naturally represented by a network where the individuals are associated with the nodes and the exchanged texts with the edges. To understand those heterogeneous and complex data structures, clustering nodes into homogeneous groups as well as rendering a comprehensible visualisation of the data is mandatory. To address both issues, we introduce Deep-LPTM, a model-based clustering strategy relying on a variational graph auto-encoder approach as well as a probabilistic model to characterise the topics of discussion. Deep-LPTM allows to build a joint representation of the nodes and of the edges in two embeddings spaces. The parameters are inferred using a variational inference algorithm. We also introduce IC2L, a model selection criterion specifically designed to choose models with relevant clustering and visualisation properties. An extensive benchmark study on synthetic data is provided. In particular, we find that Deep-LPTM better recovers the partitions of the nodes than the state-of-the art ETSBM and STBM. Eventually, the emails of the Enron company are analysed and visualisations of the results are presented, with meaningful highlights of the graph structure.
@article{boutin2023deep,title={The Deep Latent Position Topic Model for Clustering and Representation of Networks with Textual Edges},author={Boutin, R{\'e}mi and Latouche, Pierre and Bouveyron, Charles},journal={arXiv preprint arXiv:2304.08242},year={2023},}
Embedded Topics in the Stochastic Block Model
Rémi Boutin, Charles Bouveyron, and Pierre Latouche
Communication networks such as emails or social networks are now ubiquitous and their analysis has become a strategic field. In many applications, the goal is to automatically extract relevant information by looking at the nodes and their connections. Unfortunately, most of the existing methods focus on analysing the presence or absence of edges and textual data is often discarded. However, all communication networks actually come with textual data on the edges. In order to take into account this specificity, we consider in this paper networks for which two nodes are linked if and only if they share textual data. We introduce a deep latent variable model allowing embedded topics to be handled called ETSBM to simultaneously perform clustering on the nodes while modelling the topics used between the different clusters. ETSBM extends both the stochastic block model (SBM) and the embedded topic model (ETM) which are core models for studying networks and corpora, respectively. The inference is done using a variational-Bayes expectation-maximisation algorithm combined with a stochastic gradient descent. The methodology is evaluated on synthetic data and on a real world dataset.
@article{boutin2023embedded,title={Embedded Topics in the Stochastic Block Model},author={Boutin, R{\'e}mi and Bouveyron, Charles and Latouche, Pierre},journal={Statistics and Computing},volume={33},number={5},pages={1--20},year={2023},publisher={Springer},}
Deep Graphical Models and Inference Strategies for the Analysis of Networks Comprising Textual Edges
In this manuscript, we shall develop new methodologies to cluster nodes of networks, possibly holding textual edges. We aim at providing an end-to-end modelling, capable of using the texts exchanged between the nodes as well as the network topology to extract salient information at the core of the dataset. This work is motivated by questions arising in different fields such as social sciences. Gathering and understanding large datasets from social media may help researchers to answer questions, regarding the way a policy may be perceived, for instance. We adopt a probabilistic modelling framework to classify nodes and analyse texts. Among other things, these models provide information on the uncertainty of our estimates as well as a framework that has proven to be robust historically. Furthermore, in order to benefit from the efficiency of deep neural networks to encode complex types of data, our methodologies strive to include them within a probabilistic framework. Several analyses of real data are provided. In particular, during several months preceding the 2017 French presidential election, each publication of one social media, as well as their republications, involved with one of the candidates were gathered to form a data base. Our methodology helps understanding the groups present on the social media as well as the way interactions were taking place during this particular time period. Python implementations associated with the methodologies developed in this manuscript have been made public.