Java Stream的一种直观的调试方法

引言

在Java开发中，Stream API为我们提供了强大的数据处理能力，但复杂的流操作往往难以调试。传统的断点调试只能看到单条数据的处理过程，无法直观地观察整个数据流的生命周期。本文将介绍一种更直观的Stream调试方法。

问题场景

考虑有下面一段简单代码：

package cn.zeros;

import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class JavaStreamDebug {

    public static void main(String[] args) {
        // 创建测试数据
        var people = List.of(new Person("Tom", "UK", 23),
                new Person("Bob", "UK", 30),
                new Person("Kimi", "CN", 25),
                new Person("Tony", "CN", 16),
                new Person("Cube", "CN", 45),
                new Person("Pang", "CN", 25)
        );
        
        // 简单的Stream操作：过滤年龄大于20的人，按国家分组
        Map<String, List<Person>> collect = people.stream()
                .filter(p -> p.getAge() > 20)
                .collect(Collectors.groupingBy(Person::getCountry));
    }

    @Getter
    @ToString
    static class Person {
        final String name;
        final String country;
        final int age;

        Person(String name, String country, int age) {
            this.name = name;
            this.country = country;
            this.age = age;
        }
    }
}

传统调试方法的局限性

如果按照我们平时打断点的方式调试流的处理过程，就像这样：

传统调试方法只能看到数据的最终结果，或者单条数据流经filter、map等方法，如果整个数据流程很复杂，调试起来就十分困难。

复杂场景示例

例如下面更复杂的一些场景：

package cn.zeros;

import java.util.*;
import java.util.stream.Collectors;

public class JavaStreamDebug {

    public static void main(String[] args) {
        // 创建测试数据
        var people = List.of(
                new Person("Tom", "UK", 23, "Engineer"),
                new Person("Bob", "UK", 30, "Manager"),
                new Person("Kimi", "CN", 25, "Developer"),
                new Person("Tony", "CN", 16, "Student"),
                new Person("Cube", "CN", 45, "Director"),
                new Person("Pang", "CN", 25, "Developer"),
                new Person("Alice", "US", 28, "Designer"),
                new Person("John", "US", 35, "Manager"),
                new Person("Emma", "UK", 22, "Intern"),
                new Person("David", "US", 40, "Architect")
        );


        // 1. 复杂的分组和聚合操作
        System.out.println("\n=== 1. 按国家分组，计算每个国家的平均年龄和人数 ===");
        Map<String, Double> avgAgeByCountry = people.stream()
                .collect(Collectors.groupingBy(
                        Person::getCountry,
                        Collectors.averagingDouble(Person::getAge)
                ));
        avgAgeByCountry.forEach((country, avgAge) -> 
                System.out.println(country + ": 平均年龄 " + String.format("%.2f", avgAge)));

        // 2. 多级分组和排序
        System.out.println("\n=== 2. 按国家分组，再按职业分组，并排序 ===");
        Map<String, Map<String, List<Person>>> groupedByCountryAndJob = people.stream()
                .filter(p -> p.getAge() >= 18) // 过滤未成年人
                .collect(Collectors.groupingBy(
                        Person::getCountry,
                        Collectors.groupingBy(Person::getJob)
                ));
        groupedByCountryAndJob.forEach((country, jobMap) -> {
            System.out.println("\n" + country + ":");
            jobMap.forEach((job, personList) -> {
                System.out.println("  " + job + ": " + personList.size() + "人");
                personList.stream()
                        .sorted(Comparator.comparing(Person::getAge).reversed())
                        .forEach(p -> System.out.println("    " + p));
            });
        });

        // 3. 复杂的过滤和转换
        System.out.println("\n=== 3. 找出每个国家最年长的人 ===");
        Map<String, Optional<Person>> oldestByCountry = people.stream()
                .collect(Collectors.groupingBy(
                        Person::getCountry,
                        Collectors.maxBy(Comparator.comparing(Person::getAge))
                ));
        oldestByCountry.forEach((country, oldest) -> 
                oldest.ifPresent(p -> System.out.println(country + "最年长: " + p)));

        // 4. 使用flatMap和复杂的数据处理
        System.out.println("\n=== 4. 生成所有可能的姓名组合（用于测试flatMap） ===");
        List<String> firstNames = List.of("Alex", "Chris", "Sam");
        List<String> lastNames = List.of("Smith", "Johnson", "Brown");
        
        List<String> fullNames = firstNames.stream()
                .flatMap(firstName -> 
                        lastNames.stream()
                                .map(lastName -> firstName + " " + lastName)
                )
                .collect(Collectors.toList());
        fullNames.forEach(System.out::println);

        // 5. 复杂的统计操作
        System.out.println("\n=== 5. 年龄统计信息 ===");
        IntSummaryStatistics ageStats = people.stream()
                .mapToInt(Person::getAge)
                .summaryStatistics();
        System.out.println("年龄统计: " + ageStats);

        // 6. 使用reduce进行复杂计算
        System.out.println("\n=== 6. 计算所有成年人年龄的加权平均（按国家人口权重） ===");
        Map<String, Long> countryCounts = people.stream()
                .filter(p -> p.getAge() >= 18)
                .collect(Collectors.groupingBy(Person::getCountry, Collectors.counting()));
        
        double weightedAvg = people.stream()
                .filter(p -> p.getAge() >= 18)
                .mapToDouble(p -> p.getAge() * countryCounts.get(p.getCountry()))
                .sum() / countryCounts.values().stream().mapToLong(Long::longValue).sum();
        System.out.println("加权平均年龄: " + String.format("%.2f", weightedAvg));

        // 7. 使用peek进行调试
        System.out.println("\n=== 7. 使用peek调试流处理过程 ===");
        List<String> debugResult = people.stream()
                .peek(p -> System.out.println("处理: " + p))
                .filter(p -> p.getAge() > 25)
                .peek(p -> System.out.println("年龄>25: " + p))
                .map(Person::getName)
                .peek(name -> System.out.println("提取姓名: " + name))
                .sorted()
                .peek(name -> System.out.println("排序后: " + name))
                .collect(Collectors.toList());
        System.out.println("最终结果: " + debugResult);

        // 8. 复杂的收集器组合
        System.out.println("\n=== 8. 按国家收集人员信息（包含统计） ===");
        Map<String, Map<String, Object>> countryStats = people.stream()
                .collect(Collectors.groupingBy(
                        Person::getCountry,
                        Collectors.collectingAndThen(
                                Collectors.toList(),
                                list -> {
                                    Map<String, Object> stats = new HashMap<>();
                                    stats.put("count", list.size());
                                    stats.put("avgAge", list.stream().mapToInt(Person::getAge).average().orElse(0));
                                    stats.put("maxAge", list.stream().mapToInt(Person::getAge).max().orElse(0));
                                    stats.put("minAge", list.stream().mapToInt(Person::getAge).min().orElse(0));
                                    stats.put("names", list.stream().map(Person::getName).collect(Collectors.toList()));
                                    return stats;
                                }
                        )
                ));
        countryStats.forEach((country, stats) -> {
            System.out.println("\n" + country + ":");
            stats.forEach((key, value) -> System.out.println("  " + key + ": " + value));
        });

        // 9. 并行流处理
        System.out.println("\n=== 9. 并行流处理（模拟耗时操作） ===");
        long startTime = System.currentTimeMillis();
        List<String> parallelResult = people.parallelStream()
                .filter(p -> p.getAge() >= 20)
                .map(p -> {
                    // 模拟一些处理时间
                    try {
                        Thread.sleep(10);
                    } catch (InterruptedException e) {
                        Thread.currentThread().interrupt();
                    }
                    return p.getName().toUpperCase() + " (" + p.getCountry() + ")";
                })
                .collect(Collectors.toList());
        long endTime = System.currentTimeMillis();
        System.out.println("并行处理结果: " + parallelResult);
        System.out.println("并行处理耗时: " + (endTime - startTime) + "ms");

        // 10. 自定义收集器示例
        System.out.println("\n=== 10. 自定义收集器：收集年龄范围信息 ===");
        AgeRangeInfo ageRangeInfo = people.stream()
                .collect(new AgeRangeCollector());
        System.out.println("年龄范围信息: " + ageRangeInfo);
    }

    // 自定义收集器类
    static class AgeRangeCollector implements java.util.stream.Collector<Person, AgeRangeInfo, AgeRangeInfo> {
        @Override
        public java.util.function.Supplier<AgeRangeInfo> supplier() {
            return AgeRangeInfo::new;
        }

        @Override
        public java.util.function.BiConsumer<AgeRangeInfo, Person> accumulator() {
            return (info, person) -> info.addPerson(person);
        }

        @Override
        public java.util.function.BinaryOperator<AgeRangeInfo> combiner() {
            return (info1, info2) -> {
                info1.merge(info2);
                return info1;
            };
        }

        @Override
        public java.util.function.Function<AgeRangeInfo, AgeRangeInfo> finisher() {
            return info -> info;
        }

        @Override
        public Set<Characteristics> characteristics() {
            return Set.of(Characteristics.UNORDERED);
        }
    }

    // 年龄范围信息类
    static class AgeRangeInfo {
        private int minAge = Integer.MAX_VALUE;
        private int maxAge = Integer.MIN_VALUE;
        private int totalAge = 0;
        private int count = 0;

        public void addPerson(Person person) {
            int age = person.getAge();
            minAge = Math.min(minAge, age);
            maxAge = Math.max(maxAge, age);
            totalAge += age;
            count++;
        }

        public void merge(AgeRangeInfo other) {
            minAge = Math.min(minAge, other.minAge);
            maxAge = Math.max(maxAge, other.maxAge);
            totalAge += other.totalAge;
            count += other.count;
        }

        public double getAverageAge() {
            return count > 0 ? (double) totalAge / count : 0;
        }

        @Override
        public String toString() {
            return String.format("年龄范围: %d-%d, 平均年龄: %.2f, 总人数: %d", 
                    minAge, maxAge, getAverageAge(), count);
        }
    }

    @Getter
    @ToString
    static class Person {
        final String name;
        final String country;
        final int age;
        final String job;

        Person(String name, String country, int age, String job) {
            this.name = name;
            this.country = country;
            this.age = age;
            this.job = job;
        }
   }
}

IDEA Stream 调试工具

这种复杂情况下如果stream流里面藏了bug，就很难通过普通调试方法找到问题了。还好IDEA给我们提供了比较好用的调试工具，能够直接看到数据流的生命周期。

使用步骤

1. 设置断点

例如针对”复杂的分组和聚合操作”这个流操作，我们先打个断点：

2. 启动调试并选择Stream追踪

然后点击调试面板中的三个点：

选择”Trace Current Stream Chain”：

3. 查看Stream处理链

这个功能的意图很清楚了，就是能够直接看到整个stream的处理链：

因为上面只有一个collect操作，所以stream chain中只能看到一个collect操作。

多步操作示例

我们再来看一个有多步操作的场景：过滤未成年人，按国家分组，再按职业分组

总结

IDEA的Stream调试工具虽然不能完全解决所有调试问题，但确实提供了一种更直观的方式来观察Stream的处理过程。对于复杂的collect操作，我们仍然需要通过结果来判断是否正确，但至少能够清楚地看到数据在Stream中的流转过程。