Skip to main content

String Processing

Creating Stringsโ€‹

Various Methodsโ€‹

# Single quotes
name = 'Python'

# Double quotes
message = "Hello, World!"

# Triple quotes (multiline)
text = """์ฒซ ๋ฒˆ์งธ ์ค„
๋‘ ๋ฒˆ์งธ ์ค„
์„ธ ๋ฒˆ์งธ ์ค„"""

poem = '''์žฅ๋ฏธ๋Š” ๋นจ๊ฐ›๊ณ 
์ œ๋น„๊ฝƒ์€ ํŒŒ๋ž—๋‹ค'''

# Escape sequences
quote = "He said, \"Hello!\""
path = "C:\\Users\\Documents"
new_line = "์ฒซ ์ค„\n๋‘ ๋ฒˆ์งธ ์ค„"
tab = "์ด๋ฆ„\t๋‚˜์ด"

# Raw string (ignores escape)
path = r"C:\Users\Documents"

String Indexing and Slicingโ€‹

Indexingโ€‹

text = "Python"

# Positive index (from left: 0, 1, 2...)
print(text[0]) # P
print(text[1]) # y
print(text[5]) # n

# Negative index (from right: -1, -2, -3...)
print(text[-1]) # n
print(text[-2]) # o
print(text[-6]) # P

# Error
# print(text[10]) # IndexError

Slicingโ€‹

text = "Python Programming"

# [start:end] - end not included
print(text[0:6]) # Python
print(text[7:18]) # Programming

# Can be omitted
print(text[:6]) # Python (from beginning)
print(text[7:]) # Programming (to end)
print(text[:]) # Python Programming (entire)

# Negative index
print(text[-11:]) # Programming
print(text[:-12]) # Python

# Specify step [start:end:step]
print(text[::2]) # Pto rgamn (every 2nd char)
print(text[::-1]) # gnimmargorP nohtyP (reversed)

# Practical example
url = "https://www.example.com"
domain = url[8:-4] # www.example
print(domain)

String Operationsโ€‹

Concatenation and Repetitionโ€‹

# Concatenation (+)
first = "Hello"
second = "World"
greeting = first + " " + second
print(greeting) # Hello World

# Repetition (*)
line = "=" * 20
print(line) # ====================

border = "-" * 10
print(f"{border} ์ œ๋ชฉ {border}")
# ---------- ์ œ๋ชฉ ----------

# Join multiple strings
words = ["Python", "is", "awesome"]
sentence = " ".join(words)
print(sentence) # Python is awesome

Comparisonโ€‹

# Equality/Inequality
print("hello" == "hello") # True
print("hello" != "Hello") # True

# Size comparison (lexicographic)
print("apple" < "banana") # True
print("apple" < "Apple") # False (uppercase first)

# Membership test
text = "Python Programming"
print("Python" in text) # True
print("Java" in text) # False
print("Java" not in text) # True

String Methodsโ€‹

Case Conversionโ€‹

text = "Hello, Python!"

print(text.upper()) # HELLO, PYTHON!
print(text.lower()) # hello, python!
print(text.capitalize()) # Hello, python!
print(text.title()) # Hello, Python!
print(text.swapcase()) # hELLO, pYTHON!

# Practical example - normalize user input
user_input = " YES "
if user_input.strip().lower() == "yes":
print("ํ™•์ธ๋˜์—ˆ์Šต๋‹ˆ๋‹ค")

Search and Checkโ€‹

text = "Python Programming"

# Find
print(text.find("Python")) # 0 (first position)
print(text.find("Java")) # -1 (not found)
print(text.index("Programming")) # 7
# print(text.index("Java")) # ValueError

# Count
print(text.count("o")) # 1
print(text.count("m")) # 3

# Start/end check
print(text.startswith("Python")) # True
print(text.endswith("ing")) # True

# Membership check
print("gram" in text) # True

Character Type Checkโ€‹

# Alphabetic only
print("abc".isalpha()) # True
print("abc123".isalpha()) # False

# Digits only
print("123".isdigit()) # True
print("12.3".isdigit()) # False

# Alphanumeric
print("abc123".isalnum()) # True
print("abc 123".isalnum()) # False

# Whitespace only
print(" ".isspace()) # True
print(" a ".isspace()) # False

# Upper/lowercase
print("ABC".isupper()) # True
print("abc".islower()) # True

# Practical example - password validation
password = "Pass123"
has_digit = any(c.isdigit() for c in password)
has_upper = any(c.isupper() for c in password)
has_lower = any(c.islower() for c in password)

if len(password) >= 8 and has_digit and has_upper and has_lower:
print("๊ฐ•ํ•œ ๋น„๋ฐ€๋ฒˆํ˜ธ์ž…๋‹ˆ๋‹ค")

Whitespace Removalโ€‹

text = "   hello world   "

print(text.strip()) # "hello world" (both sides)
print(text.lstrip()) # "hello world " (left)
print(text.rstrip()) # " hello world" (right)

# Remove specific characters
url = "https://example.com/"
print(url.strip("https://")) # example.com/
print(url.rstrip("/")) # https://example.com

# Practical example - CSV parsing
data = " ํ™๊ธธ๋™, 25, ์„œ์šธ "
parts = [part.strip() for part in data.split(",")]
print(parts) # ['ํ™๊ธธ๋™', '25', '์„œ์šธ']

Transformation and Replacementโ€‹

text = "Hello, Python!"

# Replace
print(text.replace("Python", "World")) # Hello, World!
print(text.replace("l", "L")) # HeLLo, Python!
print(text.replace("l", "L", 1)) # HeLlo, Python! (only 1)

# Split
words = text.split(", ")
print(words) # ['Hello', 'Python!']

csv = "ํ™๊ธธ๋™,25,์„œ์šธ"
data = csv.split(",")
print(data) # ['ํ™๊ธธ๋™', '25', '์„œ์šธ']

# Join
words = ["Python", "is", "fun"]
sentence = " ".join(words)
print(sentence) # Python is fun

# Join with specific character
print("-".join(words)) # Python-is-fun

# Split lines
text = """์ฒซ ์ค„
๋‘ ๋ฒˆ์งธ ์ค„
์„ธ ๋ฒˆ์งธ ์ค„"""
lines = text.splitlines()
print(lines) # ['์ฒซ ์ค„', '๋‘ ๋ฒˆ์งธ ์ค„', '์„ธ ๋ฒˆ์งธ ์ค„']

Alignmentโ€‹

# Left align
print("Python".ljust(10)) # "Python "
print("Python".ljust(10, "-")) # "Python----"

# Right align
print("Python".rjust(10)) # " Python"
print("Python".rjust(10, "0")) # "0000Python"

# Center align
print("Python".center(10)) # " Python "
print("Python".center(10, "*"))# "**Python**"

# Practical example - table output
print("์ด๋ฆ„".ljust(10) + "๋‚˜์ด".rjust(5))
print("ํ™๊ธธ๋™".ljust(10) + "25".rjust(5))
print("๊น€์ฒ ์ˆ˜".ljust(10) + "30".rjust(5))

String Formattingโ€‹

name = "ํ™๊ธธ๋™"
age = 25
height = 175.5

# Basic usage
print(f"์ด๋ฆ„: {name}, ๋‚˜์ด: {age}")

# Expressions
print(f"๋‚ด๋…„ ๋‚˜์ด: {age + 1}")
print(f"ํ‚ค(cm): {height}")

# Format specifiers
price = 1234567
print(f"๊ฐ€๊ฒฉ: {price:,}์›") # ๊ฐ€๊ฒฉ: 1,234,567์›

pi = 3.14159265
print(f"์›์ฃผ์œจ: {pi:.2f}") # ์›์ฃผ์œจ: 3.14

# Alignment and width
print(f"{'Python':>10}") # " Python"
print(f"{'Python':<10}") # "Python "
print(f"{'Python':^10}") # " Python "
print(f"{'Python':*^10}") # "**Python**"

# Number bases
num = 255
print(f"10์ง„์ˆ˜: {num}") # 10์ง„์ˆ˜: 255
print(f"16์ง„์ˆ˜: {num:x}") # 16์ง„์ˆ˜: ff
print(f"8์ง„์ˆ˜: {num:o}") # 8์ง„์ˆ˜: 377
print(f"2์ง„์ˆ˜: {num:b}") # 2์ง„์ˆ˜: 11111111

format() Methodโ€‹

# Position-based
print("์ด๋ฆ„: {}, ๋‚˜์ด: {}".format("ํ™๊ธธ๋™", 25))

# Specify index
print("{1}, {0}".format("World", "Hello")) # Hello, World

# Specify name
print("์ด๋ฆ„: {name}, ๋‚˜์ด: {age}".format(name="ํ™๊ธธ๋™", age=25))

# Specify format
print("๊ฐ€๊ฒฉ: {:,}์›".format(1234567))
print("๋น„์œจ: {:.1%}".format(0.856))

% Formatting (Legacy)โ€‹

name = "ํ™๊ธธ๋™"
age = 25

print("์ด๋ฆ„: %s, ๋‚˜์ด: %d" % (name, age))
print("๋น„์œจ: %.2f%%" % 85.678)

Practical Examplesโ€‹

Email Validationโ€‹

def validate_email(email):
"""Simple email validation"""
# Check if @ is included
if "@" not in email:
return False

# Split by @
parts = email.split("@")
if len(parts) != 2:
return False

local, domain = parts

# Validate local part and domain
if not local or not domain:
return False

# Check if domain contains .
if "." not in domain:
return False

return True

# Test
emails = [
"user@example.com", # โœ…
"invalid.email", # โŒ
"@example.com", # โŒ
"user@", # โŒ
]

for email in emails:
result = "์œ ํšจ" if validate_email(email) else "๋ฌดํšจ"
print(f"{email}: {result}")

Text Maskingโ€‹

def mask_phone(phone):
"""Mask phone number"""
if len(phone) == 11:
return phone[:3] + "****" + phone[7:]
elif len(phone) == 10:
return phone[:3] + "***" + phone[6:]
return phone

def mask_email(email):
"""Mask email"""
local, domain = email.split("@")
if len(local) <= 2:
masked_local = local[0] + "*"
else:
masked_local = local[0] + "*" * (len(local) - 2) + local[-1]
return f"{masked_local}@{domain}"

# Usage
print(mask_phone("01012345678")) # 010****5678
print(mask_email("hong@example.com")) # h**g@example.com

String Analyzerโ€‹

def analyze_string(text):
"""Detailed string analysis"""
return {
"๊ธธ์ด": len(text),
"๋‹จ์–ด์ˆ˜": len(text.split()),
"๋Œ€๋ฌธ์ž": sum(1 for c in text if c.isupper()),
"์†Œ๋ฌธ์ž": sum(1 for c in text if c.islower()),
"์ˆซ์ž": sum(1 for c in text if c.isdigit()),
"๊ณต๋ฐฑ": sum(1 for c in text if c.isspace()),
"ํŠน์ˆ˜๋ฌธ์ž": sum(1 for c in text if not c.isalnum() and not c.isspace())
}

text = "Hello Python 2024! Welcome to coding."
result = analyze_string(text)

print("=== ๋ฌธ์ž์—ด ๋ถ„์„ ===")
for key, value in result.items():
print(f"{key}: {value}")

URL Parserโ€‹

def parse_url(url):
"""Parse URL"""
# Split protocol
if "://" in url:
protocol, rest = url.split("://", 1)
else:
protocol = "http"
rest = url

# Split path
if "/" in rest:
domain, path = rest.split("/", 1)
path = "/" + path
else:
domain = rest
path = "/"

# Split port
if ":" in domain:
domain, port = domain.split(":")
else:
port = "80" if protocol == "http" else "443"

return {
"ํ”„๋กœํ† ์ฝœ": protocol,
"๋„๋ฉ”์ธ": domain,
"ํฌํŠธ": port,
"๊ฒฝ๋กœ": path
}

url = "https://www.example.com:8080/api/users"
result = parse_url(url)

for key, value in result.items():
print(f"{key}: {value}")

Unicode and Encodingโ€‹

Working with Unicodeโ€‹

# Korean
text = "์•ˆ๋…•ํ•˜์„ธ์š”"
print(len(text)) # 5

# Emoji
emoji = "๐Ÿ˜€๐ŸŽ‰"
print(len(emoji)) # 2

# Unicode code points
print(ord("A")) # 65
print(chr(65)) # A
print(ord("๊ฐ€")) # 44032
print(chr(44032)) # ๊ฐ€

Encoding/Decodingโ€‹

text = "์•ˆ๋…•ํ•˜์„ธ์š”"

# Encoding (string โ†’ bytes)
utf8_bytes = text.encode("utf-8")
print(utf8_bytes) # b'\xec\x95\x88\xeb\x85\x95...'

euckr_bytes = text.encode("euc-kr")
print(euckr_bytes)

# Decoding (bytes โ†’ string)
decoded = utf8_bytes.decode("utf-8")
print(decoded) # ์•ˆ๋…•ํ•˜์„ธ์š”

Frequently Asked Questionsโ€‹

Q1. Are strings immutable?โ€‹

A: Yes, strings are immutable.

text = "hello"
# text[0] = "H" # โŒ TypeError

# Create new string
text = "H" + text[1:] # โœ… "Hello"

Q2. + vs join, which is faster?โ€‹

A: join is faster when concatenating many strings.

# โŒ Slow (many string creations)
result = ""
for i in range(1000):
result += str(i)

# โœ… Fast
result = "".join(str(i) for i in range(1000))

Q3. How to handle indentation in multiline strings?โ€‹

A: Use the textwrap module

from textwrap import dedent

text = dedent("""
์ฒซ ๋ฒˆ์งธ ์ค„
๋‘ ๋ฒˆ์งธ ์ค„
์„ธ ๋ฒˆ์งธ ์ค„
""").strip()

print(text)
# ์ฒซ ๋ฒˆ์งธ ์ค„
# ๋‘ ๋ฒˆ์งธ ์ค„
# ์„ธ ๋ฒˆ์งธ ์ค„

Q4. Do I need regular expressions?โ€‹

A: They are necessary for complex pattern matching.

import re

# Simple case: string methods
email = "user@example.com"
if "@" in email and "." in email:
print("์ด๋ฉ”์ผ ํ˜•์‹")

# Complex case: regular expressions
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
if re.match(pattern, email):
print("์œ ํšจํ•œ ์ด๋ฉ”์ผ")

Next Stepsโ€‹

You've mastered string processing!

Key Takeaways:
โœ… Indexing and slicing
โœ… Various string methods
โœ… String formatting (f-string)
โœ… Search, transform, validate
โœ… Practical examples

Next Step: Learn collections in Lists and Tuples!