Master the power of sets for automation testing and data manipulation
A set in Python is a built-in data type that represents an unordered collection of unique elements. Think of it as a mathematical set - no duplicates allowed, and order doesn't matter.
# Creating sets empty_set = set() # Note: {} creates a dict, not a set numbers = {1, 2, 3, 4, 5} languages = {"Python", "Java", "JavaScript", "Python"} # Duplicate removed print(languages) # Output: {'Python', 'Java', 'JavaScript'} # From other iterables list_to_set = set([1, 2, 2, 3, 3, 4]) print(list_to_set) # Output: {1, 2, 3, 4}
Combines all unique elements from both sets
set1 = {1, 2, 3} set2 = {3, 4, 5} # Method 1: Using operator result = set1 | set2 print(result) # {1, 2, 3, 4, 5} # Method 2: Using method result = set1.union(set2) print(result) # {1, 2, 3, 4, 5}
Returns elements common to both sets
passed_tests = {"test1", "test2", "test3"} failed_tests = {"test2", "test4", "test5"} # Common tests that both passed and failed? # (This shouldn't happen in real scenarios) common = passed_tests & failed_tests print(common) # {"test2"} # Using method common = passed_tests.intersection(failed_tests)
Elements in first set but not in second
all_tests = {"test1", "test2", "test3", "test4"} executed_tests = {"test1", "test3"} # Which tests were not executed? not_executed = all_tests - executed_tests print(not_executed) # {"test2", "test4"} # Using method not_executed = all_tests.difference(executed_tests)
Elements in either set but not in both
expected_users = {"user1", "user2", "user3"} actual_users = {"user2", "user3", "user4"} # Users that are either missing or extra discrepancy = expected_users ^ actual_users print(discrepancy) # {"user1", "user4"} # Using method discrepancy = expected_users.symmetric_difference(actual_users)
# Subset and Superset checks required_permissions = {"read", "write"} user_permissions = {"read", "write", "execute", "delete"} # Check if user has all required permissions has_all_required = required_permissions.issubset(user_permissions) print(f"Has all required permissions: {has_all_required}") # True # Check if user has more than required has_more = user_permissions.issuperset(required_permissions) print(f"Has more than required: {has_more}") # True # Check if sets are disjoint (no common elements) admin_actions = {"create_user", "delete_user"} guest_actions = {"view_profile", "edit_profile"} are_disjoint = admin_actions.isdisjoint(guest_actions) print(f"No common actions: {are_disjoint}") # True
Operation | Time Complexity | Use Case |
---|---|---|
add(element) | O(1) average | Adding test results |
remove(element) | O(1) average | Removing completed tests |
element in set | O(1) average | Checking if test exists |
Union (|) | O(len(s1) + len(s2)) | Combining test suites |
Intersection (&) | O(min(len(s1), len(s2))) | Finding common failures |
# Remove duplicate test case IDs from a list test_cases = ["TC001", "TC002", "TC001", "TC003", "TC002", "TC004"] unique_tests = list(set(test_cases)) print(f"Original: {len(test_cases)}, Unique: {len(unique_tests)}") # Output: Original: 6, Unique: 4
def compare_test_results(expected_pass, actual_pass): expected = set(expected_pass) actual = set(actual_pass) # Tests that should pass but failed unexpected_failures = expected - actual # Tests that passed but weren't expected to unexpected_passes = actual - expected # Tests that passed as expected expected_passes = expected & actual return { 'unexpected_failures': unexpected_failures, 'unexpected_passes': unexpected_passes, 'expected_passes': expected_passes } # Example usage expected = ["test_login", "test_logout", "test_profile"] actual = ["test_login", "test_profile", "test_admin"] results = compare_test_results(expected, actual) print("Unexpected failures:", results['unexpected_failures']) print("Unexpected passes:", results['unexpected_passes'])
def validate_unique_users(user_list): """Ensure all user IDs are unique""" user_ids = [user['id'] for user in user_list] unique_ids = set(user_ids) if len(user_ids) != len(unique_ids): duplicates = [uid for uid in user_ids if user_ids.count(uid) > 1] return False, f"Duplicate IDs found: {set(duplicates)}" return True, "All user IDs are unique" # Test data users = [ {'id': 'user1', 'name': 'Alice'}, {'id': 'user2', 'name': 'Bob'}, {'id': 'user1', 'name': 'Charlie'} # Duplicate ID ] is_valid, message = validate_unique_users(users) print(f"Validation result: {message}")
def analyze_automation_logs(expected_events, actual_events): """Compare expected vs actual events in automation logs""" expected_set = set(expected_events) actual_set = set(actual_events) missing_events = expected_set - actual_set extra_events = actual_set - expected_set occurred_events = expected_set & actual_set print(f"ā Expected events occurred: {len(occurred_events)}") print(f"ā Missing events: {missing_events}") print(f"ā ļø Unexpected events: {extra_events}") return len(missing_events) == 0 and len(extra_events) == 0 # Example log analysis expected_events = [ "user_login", "page_load", "data_fetch", "user_logout" ] actual_events = [ "user_login", "page_load", "error_occurred", "user_logout" ] is_successful = analyze_automation_logs(expected_events, actual_events)
Answer:
# Mutable set test_cases = {"TC001", "TC002"} test_cases.add("TC003") # Works fine # Immutable frozenset frozen_cases = frozenset(["TC001", "TC002"]) # frozen_cases.add("TC003") # This would raise AttributeError # frozenset as dict key test_results = {frozen_cases: "passed"}
Answer:
# Method 1: Simple conversion (loses order) test_ids = ["TC001", "TC002", "TC001", "TC003", "TC002"] unique_ids = list(set(test_ids)) # Method 2: Preserve order using dict.fromkeys() (Python 3.7+) unique_ordered = list(dict.fromkeys(test_ids)) # Method 3: Manual approach preserving order def remove_duplicates_preserve_order(items): seen = set() result = [] for item in items: if item not in seen: seen.add(item) result.append(item) return result unique_ordered_manual = remove_duplicates_preserve_order(test_ids)
Answer:
Answer: Use the difference operation (-) or difference() method
executed_tests = {"test_login", "test_search", "test_checkout"} passed_tests = {"test_login", "test_checkout"} # Tests that were executed but failed failed_tests = executed_tests - passed_tests # or failed_tests = executed_tests.difference(passed_tests) print(failed_tests) # {"test_search"}
Answer: Create a comprehensive log validator using set operations
class LogValidator: def __init__(self, required_events, forbidden_events=None): self.required_events = set(required_events) self.forbidden_events = set(forbidden_events or []) def validate(self, actual_logs): actual_events = set(actual_logs) # Check for missing required events missing = self.required_events - actual_events # Check for forbidden events forbidden_found = actual_events & self.forbidden_events # Check for unexpected events (not required or forbidden) all_expected = self.required_events | self.forbidden_events unexpected = actual_events - all_expected is_valid = not missing and not forbidden_found return { 'valid': is_valid, 'missing_required': missing, 'forbidden_found': forbidden_found, 'unexpected_events': unexpected } # Usage example validator = LogValidator( required_events=["start_test", "login_success", "end_test"], forbidden_events=["error", "timeout"] ) log_events = ["start_test", "login_success", "data_loaded", "end_test"] result = validator.validate(log_events) print(f"Validation passed: {result['valid']}")
class TestSuiteComparator: def __init__(self): self.comparison_results = {} def compare_suites(self, suite_a, suite_b, suite_a_name="Suite A", suite_b_name="Suite B"): set_a = set(suite_a) set_b = set(suite_b) # All operations in one analysis union = set_a | set_b # All unique tests intersection = set_a & set_b # Common tests only_in_a = set_a - set_b # Tests only in suite A only_in_b = set_b - set_a # Tests only in suite B symmetric_diff = set_a ^ set_b # Tests in either but not both self.comparison_results = { 'total_unique_tests': len(union), 'common_tests': len(intersection), 'common_test_list': intersection, f'only_in_{suite_a_name.lower().replace(" ", "_")}': only_in_a, f'only_in_{suite_b_name.lower().replace(" ", "_")}': only_in_b, 'different_tests': symmetric_diff, 'similarity_percentage': (len(intersection) / len(union)) * 100 if union else 0 } return self.comparison_results def generate_report(self): if not self.comparison_results: return "No comparison data available" report = [] report.append("=" * 50) report.append("TEST SUITE COMPARISON REPORT") report.append("=" * 50) report.append(f"Total unique tests across both suites: {self.comparison_results['total_unique_tests']}") report.append(f"Common tests: {self.comparison_results['common_tests']}") report.append(f"Similarity: {self.comparison_results['similarity_percentage']:.1f}%") report.append("") for key, value in self.comparison_results.items(): if key.startswith('only_in_') and value: suite_name = key.replace('only_in_', '').replace('_', ' ').title() report.append(f"Tests only in {suite_name}: {value}") return "\n".join(report) # Example usage regression_tests = [ "test_user_login", "test_user_logout", "test_password_reset", "test_profile_update", "test_data_export" ] smoke_tests = [ "test_user_login", "test_basic_navigation", "test_home_page_load", "test_user_logout" ] comparator = TestSuiteComparator() results = comparator.compare_suites(regression_tests, smoke_tests, "Regression", "Smoke") print(comparator.generate_report())
class TestExecutionMonitor: def __init__(self, expected_tests): self.expected_tests = set(expected_tests) self.started_tests = set() self.completed_tests = set() self.failed_tests = set() self.skipped_tests = set() def start_test(self, test_name): if test_name in self.expected_tests: self.started_tests.add(test_name) print(f"š Started: {test_name}") else: print(f"ā ļø Unexpected test started: {test_name}") def complete_test(self, test_name, status="passed"): if test_name in self.started_tests: self.completed_tests.add(test_name) if status == "failed": self.failed_tests.add(test_name) print(f"ā Completed: {test_name} - {status}") else: print(f"ā Test completed without being started: {test_name}") def skip_test(self, test_name): self.skipped_tests.add(test_name) print(f"āļø Skipped: {test_name}") def get_status_report(self): not_started = self.expected_tests - self.started_tests - self.skipped_tests in_progress = self.started_tests - self.completed_tests passed_tests = self.completed_tests - self.failed_tests return { 'total_expected': len(self.expected_tests), 'not_started': not_started, 'in_progress': in_progress, 'completed': len(self.completed_tests), 'passed': len(passed_tests), 'failed': len(self.failed_tests), 'skipped': len(self.skipped_tests), 'completion_rate': (len(self.completed_tests) / len(self.expected_tests)) * 100 } # Example usage expected_tests = [ "test_login", "test_navigation", "test_search", "test_purchase", "test_logout" ] monitor = TestExecutionMonitor(expected_tests) # Simulate test execution monitor.start_test("test_login") monitor.complete_test("test_login", "passed") monitor.start_test("test_navigation") monitor.complete_test("test_navigation", "passed") monitor.skip_test("test_search") monitor.start_test("test_purchase") monitor.complete_test("test_purchase", "failed") # Get status report status = monitor.get_status_report() print(f"\nš Execution Status:") print(f"Completion Rate: {status['completion_rate']:.1f}%") print(f"Not Started: {status['not_started']}") print(f"In Progress: {status['in_progress']}") print(f"Failed: {status['failed']}") print(f"Skipped: {status['skipped']}")
Python sets are incredibly powerful for handling unique data and performing efficient comparisons. In automation testing and QA, they provide:
Mastering set operations is essential for any QA automation engineer. They provide elegant solutions to common problems and demonstrate your understanding of efficient data structures in interviews!
"In automation testing, the ability to efficiently compare, validate, and process unique data sets is what separates good testers from great ones. Sets are your secret weapon!" šÆ
You've mastered Python set operations for automation testing