Update README and project cleanup
This commit is contained in:
70
scripts/analyze-excel.py
Normal file
70
scripts/analyze-excel.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
import openpyxl
|
||||
|
||||
# Load both files
|
||||
wb1 = openpyxl.load_workbook('/Users/inkling/Desktop/dmp/清洗1.0.xlsx')
|
||||
wb2 = openpyxl.load_workbook('/Users/inkling/Desktop/dmp/清洗2.0.xlsx')
|
||||
|
||||
ws1 = wb1.active
|
||||
ws2 = wb2.active
|
||||
|
||||
# Get first row of data from each file
|
||||
print("清洗1.0 - First 3 users (columns 1-7):")
|
||||
for row in range(2, 5):
|
||||
cols = []
|
||||
for col in range(1, 8):
|
||||
cols.append(ws1.cell(row, col).value)
|
||||
print(f" Row {row}: {cols}")
|
||||
|
||||
print("\n清洗2.0 - First 3 users (columns 1-7):")
|
||||
for row in range(2, 5):
|
||||
cols = []
|
||||
for col in range(1, 8):
|
||||
cols.append(ws2.cell(row, col).value)
|
||||
print(f" Row {row}: {cols}")
|
||||
|
||||
# Check if same users exist
|
||||
print("\n清洗1.0 中的家庭角色值:")
|
||||
roles_1 = set()
|
||||
for row in range(2, ws1.max_row + 1):
|
||||
val = ws1.cell(row, 1).value
|
||||
if val:
|
||||
roles_1.add(str(val).strip())
|
||||
|
||||
print(f"Unique values: {len(roles_1)}")
|
||||
|
||||
print("\n清洗2.0 中的家庭角色值:")
|
||||
roles_2 = set()
|
||||
for row in range(2, ws2.max_row + 1):
|
||||
val = ws2.cell(row, 1).value
|
||||
if val:
|
||||
roles_2.add(str(val).strip())
|
||||
|
||||
print(f"Unique values: {len(roles_2)}")
|
||||
|
||||
print(f"\nOverlap analysis:")
|
||||
print(f"Matching roles: {len(roles_1 & roles_2)}")
|
||||
print(f"Unique to 1.0: {len(roles_1 - roles_2)}")
|
||||
print(f"Unique to 2.0: {len(roles_2 - roles_1)}")
|
||||
|
||||
# Check column mapping - create a unique key per row from columns 1-7
|
||||
def make_key(ws, row):
|
||||
key_parts = []
|
||||
for col in range(1, 8):
|
||||
val = ws.cell(row, col).value
|
||||
key_parts.append(str(val) if val is not None else "")
|
||||
return "|".join(key_parts)
|
||||
|
||||
print("\nChecking row overlap by first 7 columns:")
|
||||
keys_1 = set()
|
||||
for row in range(2, ws1.max_row + 1):
|
||||
keys_1.add(make_key(ws1, row))
|
||||
|
||||
keys_2 = set()
|
||||
for row in range(2, ws2.max_row + 1):
|
||||
keys_2.add(make_key(ws2, row))
|
||||
|
||||
overlap = len(keys_1 & keys_2)
|
||||
print(f"Matching rows: {overlap}")
|
||||
print(f"Total rows 1.0: {len(keys_1)}")
|
||||
print(f"Total rows 2.0: {len(keys_2)}")
|
||||
Reference in New Issue
Block a user