Files
onion-dmp/scripts/analyze-excel.py
2026-04-08 14:52:09 +08:00

71 lines
2.0 KiB
Python

#!/usr/bin/env python3
import openpyxl
# Load both files
wb1 = openpyxl.load_workbook('/Users/inkling/Desktop/dmp/清洗1.0.xlsx')
wb2 = openpyxl.load_workbook('/Users/inkling/Desktop/dmp/清洗2.0.xlsx')
ws1 = wb1.active
ws2 = wb2.active
# Get first row of data from each file
print("清洗1.0 - First 3 users (columns 1-7):")
for row in range(2, 5):
cols = []
for col in range(1, 8):
cols.append(ws1.cell(row, col).value)
print(f" Row {row}: {cols}")
print("\n清洗2.0 - First 3 users (columns 1-7):")
for row in range(2, 5):
cols = []
for col in range(1, 8):
cols.append(ws2.cell(row, col).value)
print(f" Row {row}: {cols}")
# Check if same users exist
print("\n清洗1.0 中的家庭角色值:")
roles_1 = set()
for row in range(2, ws1.max_row + 1):
val = ws1.cell(row, 1).value
if val:
roles_1.add(str(val).strip())
print(f"Unique values: {len(roles_1)}")
print("\n清洗2.0 中的家庭角色值:")
roles_2 = set()
for row in range(2, ws2.max_row + 1):
val = ws2.cell(row, 1).value
if val:
roles_2.add(str(val).strip())
print(f"Unique values: {len(roles_2)}")
print(f"\nOverlap analysis:")
print(f"Matching roles: {len(roles_1 & roles_2)}")
print(f"Unique to 1.0: {len(roles_1 - roles_2)}")
print(f"Unique to 2.0: {len(roles_2 - roles_1)}")
# Check column mapping - create a unique key per row from columns 1-7
def make_key(ws, row):
key_parts = []
for col in range(1, 8):
val = ws.cell(row, col).value
key_parts.append(str(val) if val is not None else "")
return "|".join(key_parts)
print("\nChecking row overlap by first 7 columns:")
keys_1 = set()
for row in range(2, ws1.max_row + 1):
keys_1.add(make_key(ws1, row))
keys_2 = set()
for row in range(2, ws2.max_row + 1):
keys_2.add(make_key(ws2, row))
overlap = len(keys_1 & keys_2)
print(f"Matching rows: {overlap}")
print(f"Total rows 1.0: {len(keys_1)}")
print(f"Total rows 2.0: {len(keys_2)}")