파일 자동화

Python을 사용하면 반복적인 파일 작업을 자동화할 수 있습니다. 파일과 폴더를 생성, 삭제, 이동, 복사하는 작업을 코드로 처리해보겠습니다.

핵심 모듈

os 모듈

운영체제와 상호작용하는 기본 모듈입니다.

import os

# 현재 작업 디렉토리 확인
current_dir = os.getcwd()
print(f"현재 디렉토리: {current_dir}")

# 디렉토리 변경
os.chdir('/Users/username/Documents')

# 디렉토리 목록 보기
files = os.listdir('.')
print(files)

# 경로 결합 (운영체제에 맞게 자동으로)
path = os.path.join('folder', 'subfolder', 'file.txt')
print(path)  # folder/subfolder/file.txt (macOS/Linux)

shutil 모듈

고수준 파일 작업을 위한 모듈입니다.

import shutil

# 파일 복사
shutil.copy('source.txt', 'destination.txt')

# 메타데이터 포함 복사
shutil.copy2('source.txt', 'destination.txt')

# 폴더 전체 복사
shutil.copytree('source_folder', 'destination_folder')

# 파일/폴더 이동
shutil.move('old_location.txt', 'new_location.txt')

# 폴더 삭제 (내용물 포함)
shutil.rmtree('folder_to_delete')

폴더 및 파일 생성

디렉토리 생성

import os

# 단일 디렉토리 생성
if not os.path.exists('new_folder'):
    os.mkdir('new_folder')

# 중첩 디렉토리 생성
os.makedirs('parent/child/grandchild', exist_ok=True)
# exist_ok=True: 이미 존재해도 에러 발생 안 함

파일 생성

# 빈 파일 생성
with open('new_file.txt', 'w') as f:
    pass

# 내용과 함께 파일 생성
with open('data.txt', 'w', encoding='utf-8') as f:
    f.write('Hello, World!\n')
    f.write('Python 자동화')

파일 및 폴더 삭제

import os
import shutil

# 파일 삭제
if os.path.exists('file_to_delete.txt'):
    os.remove('file_to_delete.txt')

# 빈 디렉토리 삭제
if os.path.exists('empty_folder'):
    os.rmdir('empty_folder')

# 내용이 있는 디렉토리 삭제
if os.path.exists('folder_with_files'):
    shutil.rmtree('folder_with_files')

파일 검색

os.walk로 하위 폴더 탐색

import os

# 모든 하위 폴더의 파일 찾기
for root, dirs, files in os.walk('/path/to/search'):
    for file in files:
        if file.endswith('.txt'):
            full_path = os.path.join(root, file)
            print(full_path)

glob 패턴 매칭

import glob

# 현재 폴더의 모든 .py 파일
python_files = glob.glob('*.py')

# 모든 하위 폴더의 .txt 파일
all_txt_files = glob.glob('**/*.txt', recursive=True)

# 특정 패턴의 파일
data_files = glob.glob('data_*.csv')

# pathlib과 함께 사용
from pathlib import Path

for file in Path('.').rglob('*.log'):
    print(file)

파일 정보 확인

import os
from datetime import datetime

file_path = 'example.txt'

# 파일 존재 확인
exists = os.path.exists(file_path)

# 파일인지 디렉토리인지 확인
is_file = os.path.isfile(file_path)
is_dir = os.path.isdir(file_path)

# 파일 크기 (바이트)
size = os.path.getsize(file_path)
print(f"크기: {size / 1024:.2f} KB")

# 파일 수정 시간
mtime = os.path.getmtime(file_path)
modified_date = datetime.fromtimestamp(mtime)
print(f"마지막 수정: {modified_date}")

# 파일 생성 시간
ctime = os.path.getctime(file_path)
created_date = datetime.fromtimestamp(ctime)
print(f"생성 시간: {created_date}")

실전 예제

1. 사진 정리 스크립트

날짜별로 사진을 자동으로 분류합니다.

import os
import shutil
from datetime import datetime
from pathlib import Path

def organize_photos(source_dir, dest_dir):
    """사진을 날짜별로 정리"""

    # 목적지 폴더 생성
    os.makedirs(dest_dir, exist_ok=True)

    # 이미지 확장자
    image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.heic'}

    for file_path in Path(source_dir).rglob('*'):
        if file_path.suffix.lower() in image_extensions:
            # 파일 수정 날짜 가져오기
            mtime = os.path.getmtime(file_path)
            date = datetime.fromtimestamp(mtime)

            # 연도/월 폴더 생성
            year_month = date.strftime('%Y/%m')
            target_dir = os.path.join(dest_dir, year_month)
            os.makedirs(target_dir, exist_ok=True)

            # 파일 이동
            target_path = os.path.join(target_dir, file_path.name)

            # 중복 파일명 처리
            counter = 1
            while os.path.exists(target_path):
                name, ext = os.path.splitext(file_path.name)
                target_path = os.path.join(
                    target_dir,
                    f"{name}_{counter}{ext}"
                )
                counter += 1

            shutil.move(str(file_path), target_path)
            print(f"이동: {file_path.name} -> {year_month}/")

# 사용 예제
organize_photos('/Users/username/Downloads', '/Users/username/Photos')

2. 자동 백업 스크립트

중요한 파일을 날짜별로 백업합니다.

import os
import shutil
from datetime import datetime
import zipfile

def backup_files(source_dirs, backup_dir):
    """파일을 압축하여 백업"""

    # 백업 폴더 생성
    os.makedirs(backup_dir, exist_ok=True)

    # 백업 파일명 (날짜 포함)
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup_name = f"backup_{timestamp}.zip"
    backup_path = os.path.join(backup_dir, backup_name)

    # ZIP 파일 생성
    with zipfile.ZipFile(backup_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for source_dir in source_dirs:
            for root, dirs, files in os.walk(source_dir):
                for file in files:
                    file_path = os.path.join(root, file)
                    # ZIP 내 경로 설정
                    arcname = os.path.relpath(file_path, os.path.dirname(source_dir))
                    zipf.write(file_path, arcname)
                    print(f"백업: {file_path}")

    # 파일 크기 확인
    size_mb = os.path.getsize(backup_path) / (1024 * 1024)
    print(f"\n백업 완료: {backup_name} ({size_mb:.2f} MB)")

    # 오래된 백업 삭제 (30개 이상 유지 안 함)
    cleanup_old_backups(backup_dir, keep=30)

def cleanup_old_backups(backup_dir, keep=30):
    """오래된 백업 파일 삭제"""
    backups = []

    for file in os.listdir(backup_dir):
        if file.startswith('backup_') and file.endswith('.zip'):
            file_path = os.path.join(backup_dir, file)
            mtime = os.path.getmtime(file_path)
            backups.append((mtime, file_path))

    # 최신 순으로 정렬
    backups.sort(reverse=True)

    # 오래된 백업 삭제
    for _, file_path in backups[keep:]:
        os.remove(file_path)
        print(f"삭제: {os.path.basename(file_path)}")

# 사용 예제
backup_files(
    source_dirs=[
        '/Users/username/Documents',
        '/Users/username/Projects'
    ],
    backup_dir='/Users/username/Backups'
)

3. 로그 파일 정리

오래되고 큰 로그 파일을 자동으로 정리합니다.

import os
import gzip
import shutil
from datetime import datetime, timedelta

def cleanup_logs(log_dir, days_to_keep=7, compress_days=3):
    """로그 파일 정리 및 압축"""

    now = datetime.now()

    for file in os.listdir(log_dir):
        if not file.endswith('.log'):
            continue

        file_path = os.path.join(log_dir, file)

        # 파일 수정 날짜
        mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
        age = (now - mtime).days

        # 오래된 파일 삭제
        if age > days_to_keep:
            os.remove(file_path)
            print(f"삭제: {file} (생성 후 {age}일)")

        # 중간 날짜 파일 압축
        elif age > compress_days:
            # 이미 압축된 파일은 스킵
            gz_path = file_path + '.gz'
            if os.path.exists(gz_path):
                continue

            # 압축
            with open(file_path, 'rb') as f_in:
                with gzip.open(gz_path, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)

            # 원본 삭제
            os.remove(file_path)

            # 크기 비교
            original_size = os.path.getsize(file_path)
            compressed_size = os.path.getsize(gz_path)
            ratio = (1 - compressed_size / original_size) * 100

            print(f"압축: {file} (용량 {ratio:.1f}% 감소)")

# 사용 예제
cleanup_logs('/var/log/myapp', days_to_keep=7, compress_days=3)

4. 파일명 일괄 변경

여러 파일의 이름을 규칙에 따라 변경합니다.

import os
import re

def batch_rename(directory, pattern, replacement):
    """정규식 패턴으로 파일명 일괄 변경"""

    renamed_count = 0

    for filename in os.listdir(directory):
        # 패턴 매칭
        new_name = re.sub(pattern, replacement, filename)

        if new_name != filename:
            old_path = os.path.join(directory, filename)
            new_path = os.path.join(directory, new_name)

            # 중복 방지
            if os.path.exists(new_path):
                print(f"건너뜀: {filename} (이미 존재함)")
                continue

            os.rename(old_path, new_path)
            print(f"{filename} -> {new_name}")
            renamed_count += 1

    print(f"\n총 {renamed_count}개 파일명 변경 완료")

def add_prefix(directory, prefix):
    """모든 파일에 접두사 추가"""
    for filename in os.listdir(directory):
        old_path = os.path.join(directory, filename)

        # 디렉토리는 제외
        if os.path.isdir(old_path):
            continue

        new_name = prefix + filename
        new_path = os.path.join(directory, new_name)

        os.rename(old_path, new_path)
        print(f"{filename} -> {new_name}")

def sequential_numbering(directory, prefix='file', start=1):
    """파일을 순차적으로 번호 부여"""
    files = [f for f in os.listdir(directory)
             if os.path.isfile(os.path.join(directory, f))]

    # 파일 정렬
    files.sort()

    for i, filename in enumerate(files, start=start):
        # 확장자 분리
        _, ext = os.path.splitext(filename)

        old_path = os.path.join(directory, filename)
        new_name = f"{prefix}_{i:03d}{ext}"
        new_path = os.path.join(directory, new_name)

        os.rename(old_path, new_path)
        print(f"{filename} -> {new_name}")

# 사용 예제
# 공백을 언더스코어로 변경
batch_rename('/path/to/files', r'\s+', '_')

# 날짜 형식 변경 (2023-01-01 -> 20230101)
batch_rename('/path/to/files', r'(\d{4})-(\d{2})-(\d{2})', r'\1\2\3')

# 접두사 추가
add_prefix('/path/to/files', 'backup_')

# 순차 번호 부여
sequential_numbering('/path/to/photos', prefix='photo', start=1)

5. 중복 파일 찾기

해시값으로 중복 파일을 찾습니다.

import os
import hashlib
from collections import defaultdict

def file_hash(file_path, chunk_size=8192):
    """파일의 MD5 해시 계산"""
    hasher = hashlib.md5()

    with open(file_path, 'rb') as f:
        while chunk := f.read(chunk_size):
            hasher.update(chunk)

    return hasher.hexdigest()

def find_duplicates(directory):
    """중복 파일 찾기"""

    # 해시값으로 파일 그룹화
    hash_to_files = defaultdict(list)

    print("파일 스캔 중...")

    for root, dirs, files in os.walk(directory):
        for filename in files:
            file_path = os.path.join(root, filename)

            try:
                file_size = os.path.getsize(file_path)

                # 빈 파일은 제외
                if file_size == 0:
                    continue

                # 해시 계산
                file_hash_value = file_hash(file_path)
                hash_to_files[file_hash_value].append((file_path, file_size))

            except (OSError, PermissionError) as e:
                print(f"오류: {file_path} - {e}")

    # 중복 파일 출력
    print("\n중복 파일:")
    total_duplicates = 0
    total_wasted_space = 0

    for file_hash, files in hash_to_files.items():
        if len(files) > 1:
            print(f"\n그룹 (해시: {file_hash[:8]}...):")

            for file_path, file_size in files:
                size_mb = file_size / (1024 * 1024)
                print(f"  - {file_path} ({size_mb:.2f} MB)")

            # 첫 번째 파일을 제외한 나머지는 중복
            total_duplicates += len(files) - 1
            total_wasted_space += files[0][1] * (len(files) - 1)

    if total_duplicates > 0:
        wasted_mb = total_wasted_space / (1024 * 1024)
        print(f"\n총 {total_duplicates}개의 중복 파일")
        print(f"낭비되는 공간: {wasted_mb:.2f} MB")
    else:
        print("\n중복 파일 없음")

# 사용 예제
find_duplicates('/Users/username/Documents')

pathlib 사용하기

객체 지향적인 경로 처리 방식입니다.

from pathlib import Path

# 경로 생성
path = Path('/Users/username/Documents')

# 파일 존재 확인
if path.exists():
    print("존재함")

# 디렉토리인지 확인
if path.is_dir():
    print("디렉토리임")

# 파일 목록
for file in path.iterdir():
    print(file.name)

# glob 패턴
for txt_file in path.glob('*.txt'):
    print(txt_file)

# 재귀 검색
for py_file in path.rglob('*.py'):
    print(py_file)

# 경로 결합
new_path = path / 'subfolder' / 'file.txt'

# 파일 읽기/쓰기
text_file = Path('example.txt')
text_file.write_text('Hello, World!', encoding='utf-8')
content = text_file.read_text(encoding='utf-8')

# 파일 정보
print(f"크기: {text_file.stat().st_size} bytes")
print(f"확장자: {text_file.suffix}")
print(f"파일명: {text_file.stem}")
print(f"부모 디렉토리: {text_file.parent}")

자주 묻는 질문

Q1. os와 pathlib 중 어떤 것을 사용해야 하나요?

A: 새 프로젝트에는 pathlib를 권장합니다. 더 직관적이고 객체 지향적이며, 운영체제 간 호환성이 좋습니다.

# pathlib 방식 (권장)
from pathlib import Path
path = Path.home() / 'Documents' / 'file.txt'

# os 방식
import os
path = os.path.join(os.path.expanduser('~'), 'Documents', 'file.txt')

Q2. 파일을 삭제할 때 휴지통으로 보낼 수 있나요?

A: send2trash 라이브러리를 사용하면 됩니다.

from send2trash import send2trash

# 파일을 휴지통으로 이동 (완전 삭제 안 됨)
send2trash('file_to_delete.txt')

Q3. 큰 파일을 복사할 때 진행률을 보고 싶어요.

A: tqdm 라이브러리와 함께 사용할 수 있습니다.

from tqdm import tqdm
import shutil

def copy_with_progress(src, dst):
    total_size = os.path.getsize(src)

    with open(src, 'rb') as fsrc:
        with open(dst, 'wb') as fdst:
            with tqdm(total=total_size, unit='B', unit_scale=True) as pbar:
                while chunk := fsrc.read(8192):
                    fdst.write(chunk)
                    pbar.update(len(chunk))

copy_with_progress('large_file.dat', 'destination.dat')

Q4. 파일 작업 중 에러가 발생하면 어떻게 처리하나요?

A: try-except로 예외 처리를 해야 합니다.

import os

try:
    os.remove('file.txt')
except FileNotFoundError:
    print("파일이 존재하지 않습니다.")
except PermissionError:
    print("파일 삭제 권한이 없습니다.")
except Exception as e:
    print(f"알 수 없는 오류: {e}")

Q5. 파일 작업이 안전한가요? 실수로 파일을 삭제할 수 있나요?

A: 네, 조심해야 합니다. 다음 안전 수칙을 따르세요:

import os

def safe_delete(file_path):
    """안전한 파일 삭제"""

    # 1. 파일 존재 확인
    if not os.path.exists(file_path):
        print(f"파일이 없음: {file_path}")
        return False

    # 2. 사용자 확인
    response = input(f"정말 삭제하시겠습니까? {file_path} (y/n): ")
    if response.lower() != 'y':
        print("취소됨")
        return False

    # 3. 백업 (선택사항)
    backup_path = file_path + '.backup'
    shutil.copy2(file_path, backup_path)

    # 4. 삭제
    try:
        os.remove(file_path)
        print(f"삭제 완료: {file_path}")
        print(f"백업 위치: {backup_path}")
        return True
    except Exception as e:
        print(f"삭제 실패: {e}")
        return False

다음 단계

파일 자동화를 배웠다면, 다음 주제로 넘어가세요:

엑셀 자동화: openpyxl로 엑셀 작업 자동화하기
웹 자동화: Selenium으로 브라우저 제어하기
작업 스케줄링: 정기적인 작업 자동 실행하기

파일 작업 자동화는 일상적인 업무를 크게 개선할 수 있는 강력한 도구입니다. 실제 업무에 적용해보세요!

핵심 모듈​

os 모듈​

shutil 모듈​

폴더 및 파일 생성​

디렉토리 생성​

파일 생성​

파일 및 폴더 삭제​

파일 검색​

os.walk로 하위 폴더 탐색​

glob 패턴 매칭​

파일 정보 확인​

실전 예제​

1. 사진 정리 스크립트​

2. 자동 백업 스크립트​

3. 로그 파일 정리​

4. 파일명 일괄 변경​

5. 중복 파일 찾기​

pathlib 사용하기​

자주 묻는 질문​

Q1. os와 pathlib 중 어떤 것을 사용해야 하나요?​

Q2. 파일을 삭제할 때 휴지통으로 보낼 수 있나요?​

Q3. 큰 파일을 복사할 때 진행률을 보고 싶어요.​

Q4. 파일 작업 중 에러가 발생하면 어떻게 처리하나요?​

Q5. 파일 작업이 안전한가요? 실수로 파일을 삭제할 수 있나요?​

다음 단계​