#!/usr/bin/env python3 import openpyxl # Load both files wb1 = openpyxl.load_workbook('/Users/inkling/Desktop/dmp/清洗1.0.xlsx') wb2 = openpyxl.load_workbook('/Users/inkling/Desktop/dmp/清洗2.0.xlsx') ws1 = wb1.active ws2 = wb2.active # Get first row of data from each file print("清洗1.0 - First 3 users (columns 1-7):") for row in range(2, 5): cols = [] for col in range(1, 8): cols.append(ws1.cell(row, col).value) print(f" Row {row}: {cols}") print("\n清洗2.0 - First 3 users (columns 1-7):") for row in range(2, 5): cols = [] for col in range(1, 8): cols.append(ws2.cell(row, col).value) print(f" Row {row}: {cols}") # Check if same users exist print("\n清洗1.0 中的家庭角色值:") roles_1 = set() for row in range(2, ws1.max_row + 1): val = ws1.cell(row, 1).value if val: roles_1.add(str(val).strip()) print(f"Unique values: {len(roles_1)}") print("\n清洗2.0 中的家庭角色值:") roles_2 = set() for row in range(2, ws2.max_row + 1): val = ws2.cell(row, 1).value if val: roles_2.add(str(val).strip()) print(f"Unique values: {len(roles_2)}") print(f"\nOverlap analysis:") print(f"Matching roles: {len(roles_1 & roles_2)}") print(f"Unique to 1.0: {len(roles_1 - roles_2)}") print(f"Unique to 2.0: {len(roles_2 - roles_1)}") # Check column mapping - create a unique key per row from columns 1-7 def make_key(ws, row): key_parts = [] for col in range(1, 8): val = ws.cell(row, col).value key_parts.append(str(val) if val is not None else "") return "|".join(key_parts) print("\nChecking row overlap by first 7 columns:") keys_1 = set() for row in range(2, ws1.max_row + 1): keys_1.add(make_key(ws1, row)) keys_2 = set() for row in range(2, ws2.max_row + 1): keys_2.add(make_key(ws2, row)) overlap = len(keys_1 & keys_2) print(f"Matching rows: {overlap}") print(f"Total rows 1.0: {len(keys_1)}") print(f"Total rows 2.0: {len(keys_2)}")