71 lines
2.0 KiB
Python
71 lines
2.0 KiB
Python
#!/usr/bin/env python3
|
|
import openpyxl
|
|
|
|
# Load both files
|
|
wb1 = openpyxl.load_workbook('/Users/inkling/Desktop/dmp/清洗1.0.xlsx')
|
|
wb2 = openpyxl.load_workbook('/Users/inkling/Desktop/dmp/清洗2.0.xlsx')
|
|
|
|
ws1 = wb1.active
|
|
ws2 = wb2.active
|
|
|
|
# Get first row of data from each file
|
|
print("清洗1.0 - First 3 users (columns 1-7):")
|
|
for row in range(2, 5):
|
|
cols = []
|
|
for col in range(1, 8):
|
|
cols.append(ws1.cell(row, col).value)
|
|
print(f" Row {row}: {cols}")
|
|
|
|
print("\n清洗2.0 - First 3 users (columns 1-7):")
|
|
for row in range(2, 5):
|
|
cols = []
|
|
for col in range(1, 8):
|
|
cols.append(ws2.cell(row, col).value)
|
|
print(f" Row {row}: {cols}")
|
|
|
|
# Check if same users exist
|
|
print("\n清洗1.0 中的家庭角色值:")
|
|
roles_1 = set()
|
|
for row in range(2, ws1.max_row + 1):
|
|
val = ws1.cell(row, 1).value
|
|
if val:
|
|
roles_1.add(str(val).strip())
|
|
|
|
print(f"Unique values: {len(roles_1)}")
|
|
|
|
print("\n清洗2.0 中的家庭角色值:")
|
|
roles_2 = set()
|
|
for row in range(2, ws2.max_row + 1):
|
|
val = ws2.cell(row, 1).value
|
|
if val:
|
|
roles_2.add(str(val).strip())
|
|
|
|
print(f"Unique values: {len(roles_2)}")
|
|
|
|
print(f"\nOverlap analysis:")
|
|
print(f"Matching roles: {len(roles_1 & roles_2)}")
|
|
print(f"Unique to 1.0: {len(roles_1 - roles_2)}")
|
|
print(f"Unique to 2.0: {len(roles_2 - roles_1)}")
|
|
|
|
# Check column mapping - create a unique key per row from columns 1-7
|
|
def make_key(ws, row):
|
|
key_parts = []
|
|
for col in range(1, 8):
|
|
val = ws.cell(row, col).value
|
|
key_parts.append(str(val) if val is not None else "")
|
|
return "|".join(key_parts)
|
|
|
|
print("\nChecking row overlap by first 7 columns:")
|
|
keys_1 = set()
|
|
for row in range(2, ws1.max_row + 1):
|
|
keys_1.add(make_key(ws1, row))
|
|
|
|
keys_2 = set()
|
|
for row in range(2, ws2.max_row + 1):
|
|
keys_2.add(make_key(ws2, row))
|
|
|
|
overlap = len(keys_1 & keys_2)
|
|
print(f"Matching rows: {overlap}")
|
|
print(f"Total rows 1.0: {len(keys_1)}")
|
|
print(f"Total rows 2.0: {len(keys_2)}")
|