Note: The implementation uses Python 3.11
Introduction
Coding exercises
Find a line with max number of points
Problem: Given n points on a 2-dimension plane, find the maximum number of points that lie on the same line.
Solution: From the mathematics standpoint, N points with coordinates (x, y) are aligned if slope dy/dx between each of these N points are the same (i.e. (1, 1), (3, 3) and (6, 6) belongs to line y = 3.x). This implementation uses recursion. We define a data class Point with basic == and slope computation methods. Time complexity O(n).
Implementation:
Solution: From the mathematics standpoint, N points with coordinates (x, y) are aligned if slope dy/dx between each of these N points are the same (i.e. (1, 1), (3, 3) and (6, 6) belongs to line y = 3.x). This implementation uses recursion. We define a data class Point with basic == and slope computation methods. Time complexity O(n).
Implementation:
from dataclasses import dataclass
from typing import TypeVar, List, Set, Dict
Point = TypeVar("Point")
@dataclass
class Point(object):
"""
Convenient class wrapper to implement the == operator and computation of the derivative
"""
x: int
y: int
# Override == operator for this class
def __eq__(self, other: Point) -> bool:
return self.x == other.x and self.y == other.y
# Compute the derivative/sloped between any two points
def slope(self, other: Point) -> float:
if self == other: # To safe guard against self
return -1.0
elif self.x == other.x: # To avoid divide by zero
return 1e+8
else: # Otherwise compute the derivative
return float(self.y - other.y)/(self.x - other.x)
def max_num_points(points: List[Point]) -> List[Point]:
"""
Compute the number of points which belongs to the same line (same slope) using recursion
"""
# Execute recursion along the list of points using list index
def align(index: int, visited: Dict[float, List[Point]]) -> Dict[float, List[Point]]:
if index >= len(points) - 1:
return visited
else:
for pt in points:
# Make sure we exclude a data point already visited
if not points[index] == pt:
this_slope = points[index].slope(pt)
# Update or create
new_visited: List[Point] = visited[this_slope] if this_slope in visited else []
# Avoid duplicate list of points associated with
if pt not in new_visited:
new_visited.append(pt)
# Update the dictionary { slope: list of points with same slope }
visited[this_slope] = new_visited
return align(index+1, visited)
if len(points) == 0:
return []
else:
all_slopes_dict = align(0, {})
# Finally extract the list of points associated with
# a slope with maximum number of points.
max_num_pts = -1
output_points = None
for slope, points in all_slopes_dict.items():
if max_num_pts < len(points):
max_num_pts = len(points)
output_points = points
return output_points
input_points = [
Point(1, 1), Point(2, 4), Point(3, 3), Point(4, 1), Point(5, 11), Point(6, 6), Point(8, 13)
]
print(str(max_num_points(input_points)))
# [Point(x=3, y=3), Point(x=6, y=6), Point(x=1, y=1)]
Optimize coins distribution
Problem: Find the minimum set of coins for a set of coin_types (1 cent, 5 cents ... 10 cents) required to foot a given bill (meet a target_amount). For example 168 = 100 cents * 1 + 50 cents * 1 + 10 cents * 2 + 5 cent * 1 + 1 cent * 3.
Solution: Use recursion, minimizing the number of coins needs to reach the needed amount at each step. The recursion exits when either all the various type of coins have been used or the target amount has been finally reached. The time complexity is O(n)
Implementation:
Solution: Use recursion, minimizing the number of coins needs to reach the needed amount at each step. The recursion exits when either all the various type of coins have been used or the target amount has been finally reached. The time complexity is O(n)
class CoinsDistribution(object):
"""
Find the optimal distribution of set of coins (1 cent, 5 cent, 10 cent,..)
to foot a given bill (i.e. 127 cents = 100 cents*1 + 25 cents*1 + 1 cent* 2
Recursion on the remaining amount to close once a coin if found
"""
def __init__(self, coins: []):
# We do not assume the coins are actually sorted
self.coin_types = sorted(coins, reverse=True)
def find(self, target_amount: int) -> List[int]:
def _find(left_over_amount: int, index: int, coin_distribution: List[int]) -> List[int]:
# Recursion exit condition (amount reached or no more coin left)
if left_over_amount == 0 or index >= len(self.coin_types):
return coin_distribution
else:
remaining_amount = left_over_amount
# Attempt to assign as many coins for this category of coins
while remaining_amount >= self.coin_types[index]:
remaining_amount -= self.coin_types[index]
coin_distribution[index] += 1
# Move to the next type of coin
return _find(remaining_amount, index+1, coin_distribution)
return _find(target_amount, 0, [0] * len(self.coin_types))
coin_types = [1, 5, 10, 25, 50, 100]
target_amount = 376
coins_distribution = CoinsDistribution(coin_types)
distribution = coins_distribution.find(target_amount)
acc = [f'{distribution}*{coins_distribution.coin_types[index]}'
for index, distribution in enumerate(distribution) if distribution > 0]
print(' + '.join(acc)) # 3*100 + 1*50 + 1*25 + 1*1
Test if a list has a cycle
Problem: Find if a list, input_values has a cycle. The problem is equivalent to finding the first duplicate in a list.Solution: Use two iterators: the second iterator advancing twice as fast the first one. Time complexity O(n).
def has_cycle(input_values: List[int]) -> bool:
""" Test if a list has a duplicate or a cycle, using two iterator """
iter1 = iter(input_values)
iter2 = iter(input_values)
try:
while True:
value1 = next(iter1)
next(iter2) # Iter2 advances two elements per iteration
value2 = next(iter2)
if value1 == value2:
return True # Find & exit
except StopIteration as e:
return False
Test:
values1 = [2, 4, 6, 3, 11, 7, 9, 14, 17]
values2 = [2, 4, 6, 3, 11, 6, 9, 14, 17]
print(has_cycle(values1)) # False
print(has_cycle(values2)) # True
Intersection of sorted arrays
Solution: Map the two lists of integer n to lists of tuples (n, list_index) and push the tuples into a priority queue using heaps module. Record two consecutive items, from two different list popped from the priority queue having the same value. The time complexity is O(long + n) ~ O(n)
Implementation:
def intersect_sorted_lists(list1: List[int], list2: List[int]) -> List[int]:
"""
Extract the intersection of two sorted list of different size using
a priority queue and recursion
"""
# Taken care of the basic case
if list1[-1] < list2[0] or list1[0] > list2[-1]:
return []
else:
# Otherwise convert integers, n into tuple (n, list_index)
import heapq
pqueue = []
[heapq.heappush(pqueue, (item, 0)) for item in list1]
[heapq.heappush(pqueue, (item, 1)) for item in list2]
# Recursively popping up tuples from the priority queue
def intersect(new_tuple: Tuple, tuples_list: List[Tuple]) -> List[Tuple]:
if len(pqueue) == 0: # Our recursion exit condition
return tuples_list
else:
# Next tuple in the priority queue
item, index = heapq.heappop(pqueue)
# If two consecutive integers from different list have the same value...
if new_tuple[1] == item and index != new_tuple[0]:
tuples_list.append(new_tuple)
return intersect((index, item), tuples_list)
first_item, first_index = heapq.heappop(pqueue)
intersect_tuples = intersect((first_item, first_index), [])
return [item for _, item in intersect_tuples]
values1 = [0, 2, 8, 10, 12, 34, 46, 48, 54, 99]
values2 = [3, 8, 6, 12, 14, 15, 18, 19, 22, 40, 41, 44, 45, 46, 50, 53]
print(intersect_sorted_lists(values1, values2)) # [8, 12, 46]
Sequence of consecutive integers with highest sum
Solution: Implements an efficient tail recursion to generate a tuple (sum of the sequence, starting index of the sequence).
def extract_seq_max_sum(input_list: List[int], num_items: int) -> (int, int): """ Extract the sequence of consecutive integers with the maximum summation """ def _extract_seq_max_sum( input_list: List[int], num_items: int, max_sum: int, start_index: int, cnt: int) -> (int, int): # Condition to exit the recursion if len(input_list) < num_items: return max_sum, start_index new_sum = sum(input_list[:num_items]) new_max_sum, new_start_index = \
\(new_sum, cnt) if new_sum > max_sum \
else (max_sum, start_index)
return _extract_seq_max_sum(input_list[1:], num_items, new_max_sum, new_start_index, cnt + 1)
return _extract_seq_max_sum(input_list, num_items, 0, 0, 0)
values = [4, 2, 8, 6, 12, 34, 6, 8, 4, 9, 11, 2, 17, 22, 5, 8, 6, 1, 4, 13, 19]
print(extract_seq_max_sum(values, 3)) # 52, 3 [6, 12, 34]
print(extract_seq_max_sum(values, 5)) # 66, 6 [8, 6, 12, 34, 6]
Longest sequence of increasing values
Solution: Implements a recursion longest_increasing_seq over the input list of integers by tracking the sequence contains the current value and comparing with the current longest sequence of increasing value. The recursion walks through the input values using an iterator.
Implementation:
def longest_increasing_seq(input_values: List[int]) -> List[int]:
""" Extract the longest increasing sequence of integer from a list using recursion."""
def longest_increasing_seq(
input_values_iter,
cur_increasing_seq: List[int],
longest_seq: List[int]) -> List[int]:
while True:
try:
# Next element in the list
next_element = next(input_values_iter)
# If current increasing list empty or new element > last element
# add the new element in the current list and continue
if len(cur_increasing_seq) == 0 or next_element > cur_increasing_seq[-1]:
cur_increasing_seq.append(next_element)
# Otherwise complete the current increasing sequence and
# update the longest list if necessary
else:
new_longest_seq = cur_increasing_seq.copy() \
if len(cur_increasing_seq) > len(longest_seq) \
else longest_seq
# Re-initialize the current increasing list
cur_increasing_seq.clear()
cur_increasing_seq.append(next_element)
# Invoke the next recursion
return longest_increasing_seq(input_values_iter, cur_increasing_seq, new_longest_seq)
except StopIteration as e: # Exit point for recursion
return longest_seq
return longest_increasing_seq(iter(input_values), [], [])
Test:
values = [6, 1, 4, 9, 11, 22, 17, 8, 6, 1, 4, 13, 19]
print(str(longest_increasing_seq(values))) # 1, 4, 9, 11, 22
List of items which value equals its index
Solution: Simple traversal with a time complexity O(n).
Implementation:
def get_values_eq_index(input_list: List[int]) -> List[int]:
"""
Retrieve the list of element for which the value is equal to its index in the list
"""
match len(input_list):
case 0:
return []
case 1:
return input_list
case _:
return [el for index, el in enumerate(input_list) if index == el]
Test:
input_list = [2, 9, 2, 5, 4, 8, 1, 5, 8, 10]
print(get_values_eq_index(input_list)) # 8
Find first duplicate in a list
Problem: Find the first duplicate in a string.Solution: Use a set to keep track of visited nodes. Worst case time complexity O(n). This is an example for which recursion is not warranted.
def get_first_duplicate(input_str: AnyStr) -> Optional[AnyStr]:
"""Get the first char duplicate in a string if found, None otherwise"""
match len(input_str):
case 0: return None
case 1: return input_str[0] # Return the only element
case _: # After dealing with the first special cases
unique_set = set()
for ch in input_str:
if ch in unique_set:
return ch
unique_set.add(ch)
return None
input_str1 = "helo"
input_str2 = "hello"
print(get_first_duplicate(input_str1)) # None
print(get_first_duplicate(input_str2)) # 'l'
Check if a binary tree is balanced
Problem: Test whether a binary tree defined its node, Node, is balanced. Every path between the root and any leaf should have the same number of nodes.Solution: Apply an in-order-traversal to collect the number of nodes of all the various lineages, root to leaves then compute the difference of longest and shortest lineages. The traversal of the tree, method, __walk uses recursion. Time complexity O(n).
class Node(object):
""" Basic binary tree node """
def __init__(self, data: int):
self.left = None
self.right = None
self.data = data
def add(self, data: int) -> NoReturn:
"""
Simple insertion to a binary tree, appending a new node if necessary
"""
if data < self.data:
if self.left:
self.left.add(data)
else:
self.left = BinTreeNode(data)
elif data > self.data:
if self.right:
self.right.add(data)
else:
self.right = BinTreeNode(data)
else:
self.data = data
def is_balanced(self) -> bool:
path_acc = []
self.__walk('', path_acc)
if path_acc:
# Measure the length of the various paths from root to leaves
path_sizes = [len(x) for x in path_acc if x != '']
return max(path_sizes) - min(path_sizes) < 1
else:
return True
# --------- Private helper method ---------
def __walk(self, path: AnyStr, lineages: List[AnyStr]):
""" DFS walk """
if self.left: # This node has a left child
self.left.__walk(f'{path}L', lineages)
lineages.append(path)
if self.right: # This node has a right childe
self.right.__walk(f'{path}R', lineages)
lineages.append(path)
Test:
root = Node(1)
values = [2, 9, 8, 10, 13, 7]
[root.add(n) for n in values]
print(root.is_balanced()) # False
root = Node(10)
values = [6, 14, 4, 8, 12, 16]
[root.add(n) for n in values]
print(root.is_balanced()) # True
References
He has been director of data engineering at Aideo Technologies since 2017 and he is the author of "Scala for Machine Learning" Packt Publishing ISBN 978-1-78712-238-3