Streams and Collectors in Java
Question
Explain the Stream API and Collectors in Java. How do you create and use streams? What are the common stream operations and collectors? What are the best practices for working with streams?
Answer
The Stream API in Java provides a powerful way to process collections of data in a functional style. Streams enable declarative processing of data with operations like filtering, mapping, and reducing. Collectors provide a way to accumulate stream elements into various data structures.
Basic Stream Operations
-
Creating Streams
public class StreamCreation { public static void createStreams() { // From Collection List<String> list = Arrays.asList("a", "b", "c"); Stream<String> stream1 = list.stream(); // From Array String[] array = {"a", "b", "c"}; Stream<String> stream2 = Arrays.stream(array); // From values Stream<String> stream3 = Stream.of("a", "b", "c"); // Infinite stream Stream<Integer> infiniteStream = Stream.iterate(0, n -> n + 1); // Random numbers Stream<Double> randomStream = Stream.generate(Math::random); } }
-
Basic Stream Operations
public class BasicOperations { public static void processStream(List<String> words) { // Filter List<String> longWords = words.stream() .filter(word -> word.length() > 5) .collect(Collectors.toList()); // Map List<Integer> wordLengths = words.stream() .map(String::length) .collect(Collectors.toList()); // Sort List<String> sortedWords = words.stream() .sorted() .collect(Collectors.toList()); // Distinct List<String> uniqueWords = words.stream() .distinct() .collect(Collectors.toList()); } }
Intermediate Operations
-
Filtering and Mapping
public class IntermediateOperations { public static void processUsers(List<User> users) { // Filter and map List<String> activeUserEmails = users.stream() .filter(User::isActive) .map(User::getEmail) .collect(Collectors.toList()); // Multiple operations List<String> processedNames = users.stream() .filter(user -> user.getAge() >= 18) .map(User::getName) .map(String::toUpperCase) .collect(Collectors.toList()); } }
-
FlatMap Operation
public class FlatMapExample { public static void processOrders(List<Order> orders) { // Get all items from all orders List<Item> allItems = orders.stream() .flatMap(order -> order.getItems().stream()) .collect(Collectors.toList()); // Count unique items long uniqueItemCount = orders.stream() .flatMap(order -> order.getItems().stream()) .distinct() .count(); } }
Terminal Operations
-
Collecting Results
public class TerminalOperations { public static void collectResults(List<Product> products) { // Collect to List List<Product> expensiveProducts = products.stream() .filter(p -> p.getPrice() > 100) .collect(Collectors.toList()); // Collect to Set Set<String> categories = products.stream() .map(Product::getCategory) .collect(Collectors.toSet()); // Collect to Map Map<String, Product> productMap = products.stream() .collect(Collectors.toMap( Product::getId, Function.identity() )); } }
-
Reduction Operations
public class ReductionOperations { public static void reduceStream(List<Integer> numbers) { // Sum int sum = numbers.stream() .reduce(0, Integer::sum); // Maximum Optional<Integer> max = numbers.stream() .reduce(Integer::max); // Custom reduction String concatenated = numbers.stream() .map(String::valueOf) .reduce("", String::concat); } }
Custom Collectors
- Creating Custom Collectors
public class CustomCollectors { public static Collector<Product, ?, Map<String, List<Product>>> byCategoryCollector() { return Collector.of( HashMap::new, (map, product) -> { map.computeIfAbsent(product.getCategory(), k -> new ArrayList<>()) .add(product); }, (map1, map2) -> { map2.forEach((category, products) -> map1.computeIfAbsent(category, k -> new ArrayList<>()) .addAll(products)); return map1; } ); } // Usage public static void collectByCategory(List<Product> products) { Map<String, List<Product>> productsByCategory = products.stream() .collect(byCategoryCollector()); } }
Parallel Streams
- Using Parallel Streams
public class ParallelStreams { public static void processInParallel(List<Data> data) { // Parallel processing List<Result> results = data.parallelStream() .map(Data::process) .collect(Collectors.toList()); // With custom thread pool ForkJoinPool customThreadPool = new ForkJoinPool(4); List<Result> customResults = customThreadPool.submit(() -> data.parallelStream() .map(Data::process) .collect(Collectors.toList()) ).join(); } }
Best Practices
-
Stream Performance
public class StreamPerformance { // Bad - Multiple terminal operations public static void badPerformance(List<String> words) { long count = words.stream() .filter(w -> w.length() > 5) .count(); List<String> filtered = words.stream() .filter(w -> w.length() > 5) .collect(Collectors.toList()); } // Good - Single terminal operation public static void goodPerformance(List<String> words) { List<String> filtered = words.stream() .filter(w -> w.length() > 5) .collect(Collectors.toList()); long count = filtered.size(); } }
-
Stream State
public class StreamState { // Bad - Mutable state public static void badState(List<Integer> numbers) { int[] sum = {0}; numbers.stream() .forEach(n -> sum[0] += n); } // Good - Immutable state public static int goodState(List<Integer> numbers) { return numbers.stream() .reduce(0, Integer::sum); } }
Common Use Cases
-
Data Aggregation
public class DataAggregation { public static Map<String, Double> aggregateSales(List<Order> orders) { return orders.stream() .collect(Collectors.groupingBy( Order::getCategory, Collectors.summingDouble(Order::getAmount) )); } public static Map<String, Long> countByCategory(List<Product> products) { return products.stream() .collect(Collectors.groupingBy( Product::getCategory, Collectors.counting() )); } }
-
Data Transformation
public class DataTransformation { public static List<DTO> transformToDTO(List<Entity> entities) { return entities.stream() .map(entity -> new DTO( entity.getId(), entity.getName(), entity.getStatus() )) .collect(Collectors.toList()); } }
Testing
- Testing Stream Operations
@Test public void testStreamOperations() { List<String> words = Arrays.asList("hello", "world", "java"); // Test filter List<String> longWords = words.stream() .filter(w -> w.length() > 4) .collect(Collectors.toList()); assertEquals(Arrays.asList("hello", "world"), longWords); // Test map List<Integer> lengths = words.stream() .map(String::length) .collect(Collectors.toList()); assertEquals(Arrays.asList(5, 5, 4), lengths); }
Common Pitfalls
-
Stream Reuse
public class StreamReuse { // Bad - Reusing stream public static void badReuse(List<String> words) { Stream<String> stream = words.stream(); stream.filter(w -> w.length() > 5); stream.map(String::toUpperCase); // IllegalStateException } // Good - Create new stream public static void goodReuse(List<String> words) { List<String> filtered = words.stream() .filter(w -> w.length() > 5) .collect(Collectors.toList()); List<String> mapped = filtered.stream() .map(String::toUpperCase) .collect(Collectors.toList()); } }
-
Null Handling
public class NullHandling { // Bad - Potential NPE public static void badNullHandling(List<String> words) { words.stream() .map(String::toUpperCase) .forEach(System.out::println); } // Good - Null safety public static void goodNullHandling(List<String> words) { words.stream() .filter(Objects::nonNull) .map(String::toUpperCase) .forEach(System.out::println); } }