Python 算法教程(70):图算法与高级应用

前言

图论是高级算法的核心,也是技术面试中的高频考点。本文将深入讲解图的遍历、最短路径、拓扑排序等经典算法,并提供完整的 Python 实现和实际应用案例。

一、图的基本概念

1.1 图的定义

图(Graph)是由顶点(Vertex)和边(Edge)组成的数据结构,用于表示对象之间的关系。

# 图的表示方法

# 1. 邻接表(推荐)
graph = {
    "A": ["B", "C"],
    "B": ["A", "D", "E"],
    "C": ["A", "F"],
    "D": ["B"],
    "E": ["B", "F"],
    "F": ["C", "E"]
}

# 2. 邻接矩阵(适合稠密图)
# 使用二维数组表示,matrix[i][j] = 1 表示 i 到 j 有边

# 3. 带权图(邻接表)
weighted_graph = {
    "A": {"B": 4, "C": 2},
    "B": {"A": 4, "D": 3, "E": 1},
    "C": {"A": 2, "F": 5},
    "D": {"B": 3},
    "E": {"B": 1, "F": 3},
    "F": {"C": 5, "E": 3}
}

1.2 图的分类

  • 有向图 vs 无向图 - 边是否有方向
  • 带权图 vs 无权图 - 边是否有权重
  • 连通图 - 任意两点都可达
  • 有向无环图(DAG) - 无环的有向图

二、深度优先搜索(DFS)

2.1 递归实现

def dfs_recursive(graph, start, visited=None):
    """
    深度优先搜索 - 递归实现
    时间复杂度:O(V + E),空间复杂度:O(V)
    """
    if visited is None:
        visited = set()
    
    visited.add(start)
    print(start, end=" ")  # 访问节点
    
    for neighbor in graph[start]:
        if neighbor not in visited:
            dfs_recursive(graph, neighbor, visited)
    
    return visited

# 测试
graph = {
    "A": ["B", "C"],
    "B": ["A", "D", "E"],
    "C": ["A", "F"],
    "D": ["B"],
    "E": ["B", "F"],
    "F": ["C", "E"]
}

dfs_recursive(graph, "A")  # 输出:A B D E F C

2.2 迭代实现(使用栈)

def dfs_iterative(graph, start):
    """
    深度优先搜索 - 迭代实现
    使用显式栈避免递归深度限制
    """
    visited = set()
    stack = [start]
    result = []
    
    while stack:
        node = stack.pop()
        if node not in visited:
            visited.add(node)
            result.append(node)
            # 逆序添加邻居,保证正序访问
            for neighbor in reversed(graph[node]):
                if neighbor not in visited:
                    stack.append(neighbor)
    
    return result

# 测试
dfs_iterative(graph, "A")  # 输出:["A", "B", "D", "E", "F", "C"]

2.3 DFS 应用场景

  • 路径查找
  • 连通分量检测
  • 拓扑排序
  • 环检测
  • 迷宫求解

三、广度优先搜索(BFS)

3.1 标准实现

from collections import deque

def bfs(graph, start):
    """
    广度优先搜索
    时间复杂度:O(V + E),空间复杂度:O(V)
    """
    visited = {start}
    queue = deque([start])
    result = []
    
    while queue:
        node = queue.popleft()
        result.append(node)
        
        for neighbor in graph[node]:
            if neighbor not in visited:
                visited.add(neighbor)
                queue.append(neighbor)
    
    return result

# 测试
bfs(graph, "A")  # 输出:["A", "B", "C", "D", "E", "F"]

3.2 最短路径(无权图)

from collections import deque

def bfs_shortest_path(graph, start, end):
    """
    BFS 查找最短路径(无权图)
    返回从 start 到 end 的最短路径
    """
    if start == end:
        return [start]
    
    visited = {start}
    queue = deque([(start, [start])])  # (当前节点,路径)
    
    while queue:
        node, path = queue.popleft()
        
        for neighbor in graph[node]:
            if neighbor not in visited:
                if neighbor == end:
                    return path + [neighbor]
                visited.add(neighbor)
                queue.append((neighbor, path + [neighbor]))
    
    return None  # 无路径

# 测试
path = bfs_shortest_path(graph, "A", "F")
print(path)  # 输出:["A", "C", "F"] 或 ["A", "B", "E", "F"]

3.3 BFS 应用场景

  • 无权图最短路径
  • 层级遍历
  • 社交网络中的"六度空间"
  • 爬虫的层级抓取
  • 广播消息传播

四、Dijkstra 最短路径算法

4.1 标准实现

import heapq

def dijkstra(graph, start):
    """
    Dijkstra 算法 - 单源最短路径(带权图)
    时间复杂度:O((V + E) log V),空间复杂度:O(V)
    注意:不支持负权边
    """
    # 初始化距离
    distances = {node: float("inf") for node in graph}
    distances[start] = 0
    
    # 优先队列:(距离,节点)
    pq = [(0, start)]
    
    while pq:
        curr_dist, curr_node = heapq.heappop(pq)
        
        # 跳过已找到更短路径的节点
        if curr_dist > distances[curr_node]:
            continue
        
        # 遍历邻居
        for neighbor, weight in graph[curr_node].items():
            distance = curr_dist + weight
            
            # 找到更短路径
            if distance < distances[neighbor]:
                distances[neighbor] = distance
                heapq.heappush(pq, (distance, neighbor))
    
    return distances

# 测试
weighted_graph = {
    "A": {"B": 4, "C": 2},
    "B": {"A": 4, "D": 3, "E": 1},
    "C": {"A": 2, "F": 5},
    "D": {"B": 3},
    "E": {"B": 1, "F": 3},
    "F": {"C": 5, "E": 3}
}

distances = dijkstra(weighted_graph, "A")
print(distances)  # 输出:{"A": 0, "B": 4, "C": 2, "D": 7, "E": 5, "F": 8}

4.2 带路径追踪的实现

import heapq

def dijkstra_with_path(graph, start, end):
    """
    Dijkstra 算法 - 返回最短路径和距离
    """
    distances = {node: float("inf") for node in graph}
    distances[start] = 0
    previous = {node: None for node in graph}  # 记录前驱节点
    
    pq = [(0, start)]
    
    while pq:
        curr_dist, curr_node = heapq.heappop(pq)
        
        if curr_dist > distances[curr_node]:
            continue
        
        if curr_node == end:
            break
        
        for neighbor, weight in graph[curr_node].items():
            distance = curr_dist + weight
            
            if distance < distances[neighbor]:
                distances[neighbor] = distance
                previous[neighbor] = curr_node
                heapq.heappush(pq, (distance, neighbor))
    
    # 重建路径
    path = []
    curr = end
    while curr is not None:
        path.append(curr)
        curr = previous[curr]
    path.reverse()
    
    return distances[end], path if distances[end] != float("inf") else None

# 测试
dist, path = dijkstra_with_path(weighted_graph, "A", "F")
print(f"最短距离:{dist}")  # 输出:8
print(f"路径:{path}")      # 输出:["A", "C", "F"]

4.3 应用场景

  • 地图导航(最短路径)
  • 网络路由
  • 任务调度
  • 社交网络中的关系强度

五、拓扑排序

5.1 Kahn 算法(BFS 实现)

from collections import deque, defaultdict

def topological_sort_kahn(graph, num_nodes):
    """
    拓扑排序 - Kahn 算法
    时间复杂度:O(V + E)
    适用于:有向无环图(DAG)
    """
    # 计算入度
    in_degree = defaultdict(int)
    for node in graph:
        if node not in in_degree:
            in_degree[node] = 0
        for neighbor in graph[node]:
            in_degree[neighbor] += 1
    
    # 入度为 0 的节点入队
    queue = deque([node for node in range(num_nodes) if in_degree[node] == 0])
    result = []
    
    while queue:
        node = queue.popleft()
        result.append(node)
        
        for neighbor in graph.get(node, []):
            in_degree[neighbor] -= 1
            if in_degree[neighbor] == 0:
                queue.append(neighbor)
    
    # 检查是否有环
    if len(result) != num_nodes:
        return None  # 存在环
    
    return result

# 测试:课程安排问题
# 0: 高等数学,1: 线性代数,2: 概率论,3: 机器学习
course_graph = {
    0: [1, 2],  # 高等数学是线性代数和概率论的先修
    1: [3],     # 线性代数是机器学习的先修
    2: [3],     # 概率论是机器学习的先修
    3: []
}

topological_sort_kahn(course_graph, 4)  # 输出:[0, 1, 2, 3] 或 [0, 2, 1, 3]

5.2 DFS 实现

def topological_sort_dfs(graph, num_nodes):
    """
    拓扑排序 - DFS 实现
    """
    visited = set()
    temp_mark = set()  # 临时标记,用于检测环
    result = []
    
    def visit(node):
        if node in temp_mark:
            return False  # 存在环
        if node in visited:
            return True
        
        temp_mark.add(node)
        
        for neighbor in graph.get(node, []):
            if not visit(neighbor):
                return False
        
        temp_mark.remove(node)
        visited.add(node)
        result.append(node)
        return True
    
    for node in range(num_nodes):
        if node not in visited:
            if not visit(node):
                return None  # 存在环
    
    result.reverse()
    return result

六、回溯算法

6.1 全排列

def permute(nums):
    """
    全排列问题
    时间复杂度:O(n! * n),空间复杂度:O(n)
    """
    result = []
    
    def backtrack(path, remaining):
        if not remaining:
            result.append(path[:])  # 复制当前路径
            return
        
        for i in range(len(remaining)):
            # 做选择
            path.append(remaining[i])
            # 递归
            backtrack(path, remaining[:i] + remaining[i+1:])
            # 撤销选择
            path.pop()
    
    backtrack([], nums)
    return result

# 测试
permute([1, 2, 3])
# 输出:[[1,2,3], [1,3,2], [2,1,3], [2,3,1], [3,1,2], [3,2,1]]

6.2 N 皇后问题

def solve_n_queens(n):
    """
    N 皇后问题
    时间复杂度:O(N!),空间复杂度:O(N)
    """
    result = []
    board = [["." for _ in range(n)] for _ in range(n)]
    
    # 记录已占用的列和对角线
    cols = set()
    diag_positive = set()  # 主对角线 (row + col)
    diag_negative = set()  # 副对角线 (row - col)
    
    def backtrack(row):
        if row == n:
            result.append(["".join(r) for r in board])
            return
        
        for col in range(n):
            # 检查是否冲突
            if (col in cols or 
                (row + col) in diag_positive or 
                (row - col) in diag_negative):
                continue
            
            # 放置皇后
            board[row][col] = "Q"
            cols.add(col)
            diag_positive.add(row + col)
            diag_negative.add(row - col)
            
            # 递归
            backtrack(row + 1)
            
            # 撤销
            board[row][col] = "."
            cols.remove(col)
            diag_positive.remove(row + col)
            diag_negative.remove(row - col)
    
    backtrack(0)
    return result

# 测试
solutions = solve_n_queens(4)
for solution in solutions:
    for row in solution:
        print(row)

6.3 组合总和

def combination_sum(candidates, target):
    """
    组合总和问题
    找出所有和为 target 的组合(数字可重复使用)
    """
    result = []
    
    def backtrack(start, path, total):
        if total == target:
            result.append(path[:])
            return
        if total > target:
            return
        
        for i in range(start, len(candidates)):
            path.append(candidates[i])
            # 注意:可以重复使用,所以从 i 开始(不是 i+1)
            backtrack(i, path, total + candidates[i])
            path.pop()
    
    backtrack(0, [], 0)
    return result

# 测试
combination_sum([2, 3, 6, 7], 7)
# 输出:[[2, 2, 3], [7]]

七、并查集(Union-Find)

class UnionFind:
    """
    并查集 - 用于处理不相交集合的合并与查询
    时间复杂度:接近 O(1)(均摊)
    """
    
    def __init__(self, n):
        self.parent = list(range(n))
        self.rank = [0] * n  # 树的深度
    
    def find(self, x):
        """查找根节点(带路径压缩)"""
        if self.parent[x] != x:
            self.parent[x] = self.find(self.parent[x])
        return self.parent[x]
    
    def union(self, x, y):
        """合并两个集合(按秩合并)"""
        root_x = self.find(x)
        root_y = self.find(y)
        
        if root_x == root_y:
            return False
        
        # 按秩合并
        if self.rank[root_x] < self.rank[root_y]:
            self.parent[root_x] = root_y
        elif self.rank[root_x] > self.rank[root_y]:
            self.parent[root_y] = root_x
        else:
            self.parent[root_y] = root_x
            self.rank[root_x] += 1
        
        return True
    
    def connected(self, x, y):
        """判断是否连通"""
        return self.find(x) == self.find(y)

# 应用:判断图中是否有环
def has_cycle(edges, n):
    """
    判断无向图是否有环
    edges: [(u, v), ...] 边的列表
    n: 节点数
    """
    uf = UnionFind(n)
    
    for u, v in edges:
        if uf.connected(u, v):
            return True  # 已连通,添加此边会形成环
        uf.union(u, v)
    
    return False

# 测试
edges = [(0, 1), (1, 2), (2, 3), (0, 3)]
has_cycle(edges, 4)  # 输出:True

八、最小生成树(MST)

8.1 Kruskal 算法

def kruskal_mst(edges, n):
    """
    Kruskal 算法 - 最小生成树
    时间复杂度:O(E log E)
    """
    # 按权重排序
    edges.sort(key=lambda x: x[2])
    
    uf = UnionFind(n)
    mst = []
    total_weight = 0
    
    for u, v, weight in edges:
        if uf.union(u, v):
            mst.append((u, v, weight))
            total_weight += weight
            
            if len(mst) == n - 1:
                break
    
    return mst, total_weight

# 测试
edges = [
    (0, 1, 4), (0, 2, 2), (1, 2, 1),
    (1, 3, 5), (2, 3, 8), (2, 4, 10),
    (3, 4, 2), (3, 5, 6), (4, 5, 3)
]

mst, weight = kruskal_mst(edges, 6)
print(f"最小生成树:{mst}")
print(f"总权重:{weight}")

九、实战应用

9.1 社交网络分析

def find_friends_clusters(friendships, n):
    """
    查找朋友圈(连通分量)
    """
    uf = UnionFind(n)
    
    for u, v in friendships:
        uf.union(u, v)
    
    # 统计每个朋友圈
    clusters = defaultdict(list)
    for i in range(n):
        root = uf.find(i)
        clusters[root].append(i)
    
    return list(clusters.values())

# 测试
friendships = [(0, 1), (1, 2), (3, 4)]
find_friends_clusters(friendships, 5)
# 输出:[[0, 1, 2], [3, 4]]

9.2 地图导航

def find_shortest_route(graph, start, end):
    """
    查找最短路线(Dijkstra 应用)
    graph: {地点:{邻居:距离,...}, ...}
    """
    dist, path = dijkstra_with_path(graph, start, end)
    
    if path is None:
        return "无法到达"
    
    route = " -> ".join(path)
    return f"路线:{route}\n总距离:{dist}"

# 测试
map_graph = {
    "家": {"地铁站": 5, "公交站": 3},
    "地铁站": {"家": 5, "公司": 20},
    "公交站": {"家": 3, "公司": 15},
    "公司": {"地铁站": 20, "公交站": 15}
}

print(find_shortest_route(map_graph, "家", "公司"))

十、算法复杂度对比

算法 时间复杂度 空间复杂度 适用场景
DFS O(V + E) O(V) 路径查找、环检测
BFS O(V + E) O(V) 无权图最短路径
Dijkstra O((V + E) log V) O(V) 带权图最短路径
拓扑排序 O(V + E) O(V) 任务调度、依赖关系
回溯 O(n!) O(n) 排列组合、N 皇后
并查集 O(α(n)) O(n) 连通性判断
Kruskal O(E log E) O(V) 最小生成树

总结

图算法是高级算法的核心内容,掌握这些算法对于解决实际问题和技术面试都至关重要。

关键要点:

  1. DFS vs BFS - DFS 适合深度探索,BFS 适合层级遍历和最短路径
  2. Dijkstra - 带权图最短路径的标准解法
  3. 拓扑排序 - 处理有依赖关系的任务调度
  4. 回溯 - 解决排列组合问题的通用框架
  5. 并查集 - 高效处理连通性问题

建议多刷 LeetCode 相关题目巩固理解,特别是 DFS、BFS 和回溯算法,这是面试中出现频率最高的图算法。

发表回复

后才能评论