Data-Structures-Assignment/community_detection.py at main · 3CornSoups/Data-Structures-Assignment · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""
社区发现模块
Community Detection Module
"""

import random
from collections import defaultdict
from graph_storage import SocialNetwork


def label_propagation_algorithm(network, iterations=100):
    """
    基于标签传播的社区发现算法
    Label Propagation Algorithm for Community Detection

    Args:
        network (SocialNetwork): 社交网络对象 Social network object
        iterations (int): 最大迭代次数 Maximum number of iterations

    Returns:
        dict: 社区划分结果 Community partitioning result
    """
    # 初始化每个节点的标签 Initialize labels for each node
    labels = {i: i for i in range(network.n)}

    # 迭代更新标签 Iterate to update labels
    for iteration in range(iterations):
        # 随机打乱节点顺序 Randomly shuffle node order
        nodes = list(range(network.n))
        random.shuffle(nodes)

        unchanged = True

        for node in nodes:
            # 获取邻居节点 Get neighbor nodes
            neighbors = network.get_following(node)

            # 统计邻居节点的标签频率 Count frequency of neighbor labels
            label_counts = defaultdict(int)
            for neighbor in neighbors:
                label_counts[labels[neighbor]] += 1

            # 如果没有邻居，跳过 If no neighbors, skip
            if not label_counts:
                continue

            # 选择出现频率最高的标签 Select the most frequent label
            max_count = max(label_counts.values())
            candidates = [label for label, count in label_counts.items() if count == max_count]

            # 随机选择一个标签（如果有多个最高频率的标签）Randomly select a label if multiple have max frequency
            new_label = min(candidates)  # 使用min确保结果一致性 Use min for consistent results

            # 更新标签 Update label if different
            if labels[node] != new_label:
                labels[node] = new_label
                unchanged = False

        # 如果没有标签发生变化，提前结束 If no labels changed, end early
        if unchanged:
            break

    # 将相同标签的节点分组 Group nodes with the same label
    communities = defaultdict(list)
    for node, label in labels.items():
        communities[label].append(node)

    return dict(communities)


def louvain_algorithm(network):
    """
    Louvain算法（模块度优化）- 简化版本
    Louvain Algorithm (Modularity Optimization) - Simplified Version

    Args:
        network (SocialNetwork): 社交网络对象 Social network object

    Returns:
        dict: 社区划分结果 Community partitioning result
    """
    # 简化的Louvain算法实现 Simplified Louvain algorithm implementation
    # 实际的Louvain算法更复杂，这里提供基础框架
    # The actual Louvain algorithm is more complex, here's a basic framework

    # 初始化每个节点为一个独立社区 Initialize each node as a separate community
    communities = {i: [i] for i in range(network.n)}

    # 这里可以实现完整的Louvain算法，由于复杂性较高，暂时返回标签传播的结果
    # The complete Louvain algorithm can be implemented here, but due to complexity, returning LPA result
    return label_propagation_algorithm(network)


class CommunityDetectionNetwork(SocialNetwork):
    """
    具有社区发现功能的社交网络类
    Social Network class with community detection functionality
    """

    def community_detection(self, algorithm='lpa', iterations=100):
        """
        社区发现算法
        Community Detection Algorithm

        Args:
            algorithm (str): 算法类型 ('lpa' for Label Propagation Algorithm, 'louvain' for Louvain Algorithm)
            iterations (int): 最大迭代次数 Maximum number of iterations

        Returns:
            dict: 社区划分结果，键为社区标签，值为属于该社区的节点列表
                  Community partitioning result, key is community label, value is list of nodes in that community
        """
        if algorithm == 'lpa':
            return label_propagation_algorithm(self, iterations)
        elif algorithm == 'louvain':
            return louvain_algorithm(self)
        else:
            raise ValueError(f"不支持的算法类型: {algorithm}")


# 测试示例
if __name__ == "__main__":
    print("社区发现 - 示例运行")
    print("Community Detection - Example Run")

    # 创建一个社区检测网络 Create a community detection network
    network = CommunityDetectionNetwork(10, directed=True, storage_type="adj_list")

    # 添加一些边形成社区结构 Add some edges to form community structure
    edges = [
        (0, 1), (0, 2), (1, 2),  # 社区1 Community 1
        (3, 4), (3, 5), (4, 5),  # 社区2 Community 2
        (6, 7), (6, 8), (7, 8), (7, 9), (8, 9),  # 社区3 Community 3
        (2, 3), (5, 6)  # 连接不同社区 Connect different communities
    ]

    for u, v in edges:
        network.add_edge(u, v)

    print(f"添加了 {len(edges)} 条边来形成社区结构")
    print(f"Added {len(edges)} edges to form community structure")

    # 使用标签传播算法检测社区 Use label propagation algorithm to detect communities
    communities = network.community_detection(algorithm='lpa', iterations=50)
    print(f"检测到 {len(communities)} 个社区:")
    print(f"Detected {len(communities)} communities:")

    for community_id, members in communities.items():
        print(f"  社区 {community_id}: {members}")
        print(f"  Community {community_id}: {members}")

    # 使用Louvain算法检测社区（简化版） Use Louvain algorithm to detect communities (simplified version)
    try:
        communities_louvain = network.community_detection(algorithm='louvain')
        print(f"\nLouvain算法检测到 {len(communities_louvain)} 个社区:")
        print(f"Louvain algorithm detected {len(communities_louvain)} communities:")

        for community_id, members in communities_louvain.items():
            print(f"  社区 {community_id}: {members}")
            print(f"  Community {community_id}: {members}")
    except Exception as e:
        print(f"Louvain算法运行出错: {e}")
        print(f"Error running Louvain algorithm: {e}")