Data Visualization ๐
Data visualization is the process of representing complex data in easy-to-understand graphics. Matplotlib์ ํ์ด์ฌ์์ ๊ฐ์ฅ ๋๋ฆฌ ์ฌ์ฉ๋๋ ์๊ฐํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ์ ๋๋ค.
What is Matplotlib?โ
Matplotlib is a library for drawing 2D graphs and charts in Python. MATLAB๊ณผ ์ ์ฌํ ์ธํฐํ์ด์ค๋ฅผ ์ ๊ณตํฉ๋๋ค.
Key Featuresโ
- ๋ค์ํ ์ฐจํธ: ์ , ๋ง๋, ์ฐ์ ๋, ํ์คํ ๊ทธ๋จ ๋ฑ
- ์ปค์คํฐ๋ง์ด์ง: ์ธ๋ฐํ ์คํ์ผ ์กฐ์ ๊ฐ๋ฅ
- Pandas ํตํฉ: DataFrame๊ณผ ์ฝ๊ฒ ์ฐ๋
- ์ถํ ํ์ง: ๋ ผ๋ฌธ/๋ณด๊ณ ์์ฉ ๊ทธ๋ํ ์์ฑ
Installationโ
pip install matplotlib
import matplotlib.pyplot as plt
import numpy as np
# ํ๊ธ ํฐํธ ์ค์ (macOS)
plt.rcParams['font.family'] = 'AppleGothic'
# Windows: 'Malgun Gothic'
# Linux: 'NanumGothic'
# ๋ง์ด๋์ค ๊ธฐํธ ๊นจ์ง ๋ฐฉ์ง
plt.rcParams['axes.unicode_minus'] = False
print(f"Matplotlib ๋ฒ์ : {plt.__version__}")
Basic Graphsโ
Line Plotโ
import matplotlib.pyplot as plt
import numpy as np
# ๋ฐ์ดํฐ ์ค๋น
x = np.linspace(0, 10, 100)
y = np.sin(x)
# ๊ทธ๋ํ ๊ทธ๋ฆฌ๊ธฐ
plt.plot(x, y)
plt.title('์ฌ์ธ ํจ์')
plt.xlabel('x')
plt.ylabel('sin(x)')
plt.grid(True)
plt.show()
# ์ฌ๋ฌ ์ ๊ทธ๋ฆฌ๊ธฐ
plt.figure(figsize=(10, 6))
plt.plot(x, np.sin(x), label='sin(x)')
plt.plot(x, np.cos(x), label='cos(x)')
plt.title('์ผ๊ฐํจ์')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True)
plt.show()
Style Optionsโ
x = np.linspace(0, 10, 20)
y = x ** 2
# ์ ์คํ์ผ
plt.plot(x, y, 'r--', linewidth=2) # ๋นจ๊ฐ ์ ์
plt.plot(x, y, 'bo-', markersize=8) # ํ๋ ๋๊ทธ๋ผ๋ฏธ์ ์
plt.plot(x, y, 'g^:', alpha=0.5) # ์ด๋ก ์ผ๊ฐํ๊ณผ ์ ์ (๋ฐํฌ๋ช
)
# ๋ ๋ช
์์ ์ผ๋ก
plt.plot(x, y,
color='blue', # ์์
linestyle='--', # ์ ์คํ์ผ
linewidth=2, # ์ ๋๊ป
marker='o', # ๋ง์ปค ๋ชจ์
markersize=8, # ๋ง์ปค ํฌ๊ธฐ
markerfacecolor='red', # ๋ง์ปค ๋ด๋ถ ์
markeredgecolor='black',# ๋ง์ปค ํ
๋๋ฆฌ ์
alpha=0.7, # ํฌ๋ช
๋
label='๋ฐ์ดํฐ' # ๋ฒ๋ก
)
Bar Chartโ
Vertical Bar Chartโ
import matplotlib.pyplot as plt
# ๋ฐ์ดํฐ
categories = ['A', 'B', 'C', 'D', 'E']
values = [23, 45, 56, 78, 32]
# ๋ง๋ ๊ทธ๋ํ
plt.figure(figsize=(10, 6))
plt.bar(categories, values, color='skyblue', edgecolor='black')
plt.title('์ ํ๋ณ ํ๋งค๋')
plt.xlabel('์ ํ')
plt.ylabel('ํ๋งค๋')
plt.grid(axis='y', alpha=0.3)
plt.show()
# ์์ ๋ค๋ฅด๊ฒ
colors = ['red', 'green', 'blue', 'orange', 'purple']
plt.bar(categories, values, color=colors)
plt.show()
Horizontal Bar Chartโ
# ์ํ ๋ง๋
plt.figure(figsize=(10, 6))
plt.barh(categories, values, color='lightcoral')
plt.title('์ ํ๋ณ ํ๋งค๋')
plt.xlabel('ํ๋งค๋')
plt.ylabel('์ ํ')
plt.show()
Grouped Bar Chartโ
import numpy as np
# ๋ฐ์ดํฐ
categories = ['A', 'B', 'C', 'D']
values1 = [23, 45, 56, 78]
values2 = [34, 55, 48, 70]
x = np.arange(len(categories))
width = 0.35
# ๊ทธ๋ฃน ๋ง๋
fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(x - width/2, values1, width, label='2023๋
', color='skyblue')
ax.bar(x + width/2, values2, width, label='2024๋
', color='lightcoral')
ax.set_xlabel('์ ํ')
ax.set_ylabel('ํ๋งค๋')
ax.set_title('์ฐ๋๋ณ ์ ํ ํ๋งค ๋น๊ต')
ax.set_xticks(x)
ax.set_xticklabels(categories)
ax.legend()
ax.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()
Stacked Bar Chartโ
# ๋์ ๋ง๋
categories = ['Q1', 'Q2', 'Q3', 'Q4']
product_a = [20, 35, 30, 35]
product_b = [25, 32, 34, 20]
product_c = [15, 20, 25, 30]
plt.figure(figsize=(10, 6))
plt.bar(categories, product_a, label='์ ํ A')
plt.bar(categories, product_b, bottom=product_a, label='์ ํ B')
plt.bar(categories, product_c, bottom=np.array(product_a)+np.array(product_b), label='์ ํ C')
plt.xlabel('๋ถ๊ธฐ')
plt.ylabel('ํ๋งค๋')
plt.title('๋ถ๊ธฐ๋ณ ์ ํ ํ๋งค')
plt.legend()
plt.show()
Scatter Plotโ
import matplotlib.pyplot as plt
import numpy as np
# ๋ฐ์ดํฐ
np.random.seed(42)
x = np.random.rand(100) * 100
y = np.random.rand(100) * 100
sizes = np.random.rand(100) * 100
colors = np.random.rand(100)
# ๊ธฐ๋ณธ ์ฐ์ ๋
plt.figure(figsize=(10, 6))
plt.scatter(x, y)
plt.title('๊ธฐ๋ณธ ์ฐ์ ๋')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True, alpha=0.3)
plt.show()
# ํฌ๊ธฐ์ ์์ ์ ์ฉ
plt.figure(figsize=(10, 6))
scatter = plt.scatter(x, y, s=sizes, c=colors, cmap='viridis', alpha=0.6)
plt.colorbar(scatter, label='๊ฐ')
plt.title('ํฌ๊ธฐ์ ์์์ ๊ฐ์ง ์ฐ์ ๋')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True, alpha=0.3)
plt.show()
Correlation Visualizationโ
# ํค์ ๋ชธ๋ฌด๊ฒ ์๊ด๊ด๊ณ
height = np.random.normal(170, 10, 100)
weight = height * 0.8 + np.random.normal(0, 5, 100)
plt.figure(figsize=(10, 6))
plt.scatter(height, weight, alpha=0.5)
plt.title('ํค์ ๋ชธ๋ฌด๊ฒ ์๊ด๊ด๊ณ')
plt.xlabel('ํค (cm)')
plt.ylabel('๋ชธ๋ฌด๊ฒ (kg)')
# ์ถ์ธ์ ์ถ๊ฐ
z = np.polyfit(height, weight, 1)
p = np.poly1d(z)
plt.plot(height, p(height), "r--", alpha=0.8, label='์ถ์ธ์ ')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
Histogramโ
import matplotlib.pyplot as plt
import numpy as np
# ์ ๊ท๋ถํฌ ๋ฐ์ดํฐ
data = np.random.normal(100, 15, 1000)
# ํ์คํ ๊ทธ๋จ
plt.figure(figsize=(10, 6))
plt.hist(data, bins=30, color='skyblue', edgecolor='black', alpha=0.7)
plt.title('์ํ ์ ์ ๋ถํฌ')
plt.xlabel('์ ์')
plt.ylabel('๋น๋')
plt.axvline(data.mean(), color='red', linestyle='--', linewidth=2, label=f'ํ๊ท : {data.mean():.1f}')
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.show()
# ์ฌ๋ฌ ํ์คํ ๊ทธ๋จ ๋น๊ต
data1 = np.random.normal(100, 15, 1000)
data2 = np.random.normal(110, 10, 1000)
plt.figure(figsize=(10, 6))
plt.hist(data1, bins=30, alpha=0.5, label='A๋ฐ', color='blue')
plt.hist(data2, bins=30, alpha=0.5, label='B๋ฐ', color='red')
plt.title('๋ฐ๋ณ ์ํ ์ ์ ๋ถํฌ')
plt.xlabel('์ ์')
plt.ylabel('๋น๋')
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.show()
Pie Chartโ
# ๋ฐ์ดํฐ
labels = ['Python', 'Java', 'JavaScript', 'C++', 'Others']
sizes = [35, 25, 20, 10, 10]
colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#ff99cc']
explode = (0.1, 0, 0, 0, 0) # Python ๊ฐ์กฐ
# ํ์ด ์ฐจํธ
plt.figure(figsize=(10, 8))
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
autopct='%1.1f%%', shadow=True, startangle=90)
plt.title('ํ๋ก๊ทธ๋๋ฐ ์ธ์ด ์ฌ์ฉ ๋น์จ')
plt.axis('equal')
plt.show()
# ๋๋ ์ฐจํธ
plt.figure(figsize=(10, 8))
plt.pie(sizes, labels=labels, colors=colors,
autopct='%1.1f%%', startangle=90,
wedgeprops={'width': 0.5}) # ๋๋ ๋ชจ์
plt.title('ํ๋ก๊ทธ๋๋ฐ ์ธ์ด ์ฌ์ฉ ๋น์จ (๋๋)')
plt.axis('equal')
plt.show()
Box Plotโ
import numpy as np
# ๋ฐ์ดํฐ
np.random.seed(42)
data1 = np.random.normal(100, 10, 200)
data2 = np.random.normal(90, 15, 200)
data3 = np.random.normal(110, 5, 200)
# ๋ฐ์ค ํ๋กฏ
plt.figure(figsize=(10, 6))
plt.boxplot([data1, data2, data3],
labels=['A๋ฐ', 'B๋ฐ', 'C๋ฐ'],
patch_artist=True,
boxprops=dict(facecolor='lightblue', alpha=0.5),
medianprops=dict(color='red', linewidth=2))
plt.title('๋ฐ๋ณ ์ํ ์ ์ ๋ถํฌ')
plt.ylabel('์ ์')
plt.grid(axis='y', alpha=0.3)
plt.show()
Subplotsโ
Basic Subplotsโ
# 2x2 ์๋ธํ๋กฏ
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# ์ผ์ชฝ ์: ์ ๊ทธ๋ํ
x = np.linspace(0, 10, 100)
axes[0, 0].plot(x, np.sin(x))
axes[0, 0].set_title('์ ๊ทธ๋ํ')
axes[0, 0].grid(True)
# ์ค๋ฅธ์ชฝ ์: ๋ง๋ ๊ทธ๋ํ
categories = ['A', 'B', 'C', 'D']
values = [23, 45, 56, 78]
axes[0, 1].bar(categories, values, color='skyblue')
axes[0, 1].set_title('๋ง๋ ๊ทธ๋ํ')
# ์ผ์ชฝ ์๋: ์ฐ์ ๋
x = np.random.rand(50)
y = np.random.rand(50)
axes[1, 0].scatter(x, y, alpha=0.5)
axes[1, 0].set_title('์ฐ์ ๋')
# ์ค๋ฅธ์ชฝ ์๋: ํ์คํ ๊ทธ๋จ
data = np.random.normal(0, 1, 1000)
axes[1, 1].hist(data, bins=30, color='lightcoral')
axes[1, 1].set_title('ํ์คํ ๊ทธ๋จ')
plt.tight_layout()
plt.show()
Various Layoutsโ
# ๋ถ๊ท ๋ฑํ ์๋ธํ๋กฏ
fig = plt.figure(figsize=(12, 8))
# 2x2 ๊ทธ๋ฆฌ๋์์ ๋ค์ํ ํฌ๊ธฐ
ax1 = plt.subplot(2, 2, 1)
ax1.plot([1, 2, 3], [1, 4, 9])
ax1.set_title('Plot 1')
ax2 = plt.subplot(2, 2, 2)
ax2.bar(['A', 'B', 'C'], [10, 20, 15])
ax2.set_title('Plot 2')
ax3 = plt.subplot(2, 1, 2) # ์๋์ชฝ ์ ์ฒด
ax3.plot(np.random.rand(100))
ax3.set_title('Plot 3 (Wide)')
plt.tight_layout()
plt.show()
Stylingโ
Basic Stylesโ
# ์ฌ์ฉ ๊ฐ๋ฅํ ์คํ์ผ ํ์ธ
print(plt.style.available)
# ์คํ์ผ ์ ์ฉ
plt.style.use('seaborn-v0_8-darkgrid')
# ๋๋ 'ggplot', 'fivethirtyeight', 'bmh' ๋ฑ
x = np.linspace(0, 10, 100)
y = np.sin(x)
plt.figure(figsize=(10, 6))
plt.plot(x, y, linewidth=2)
plt.title('์คํ์ผ์ด ์ ์ฉ๋ ๊ทธ๋ํ')
plt.show()
# ์คํ์ผ ์ด๊ธฐํ
plt.style.use('default')
Color Selectionโ
# ๋ค์ํ ์์ ์ง์ ๋ฐฉ๋ฒ
plt.plot(x, y, color='red') # ์ด๋ฆ
plt.plot(x, y, color='r') # ์ฝ์
plt.plot(x, y, color='#FF5733') # Hex ์ฝ๋
plt.plot(x, y, color=(0.1, 0.2, 0.5)) # RGB ํํ
plt.plot(x, y, color=(0.1, 0.2, 0.5, 0.7)) # RGBA (ํฌ๋ช
๋ ํฌํจ)
# ์ปฌ๋ฌ๋งต ์ฌ์ฉ
colors = plt.cm.viridis(np.linspace(0, 1, 10))
for i in range(10):
plt.plot(x, y + i, color=colors[i])
Fonts and Textโ
plt.figure(figsize=(10, 6))
# ํ์ดํ
plt.title('์ ๋ชฉ', fontsize=20, fontweight='bold', color='navy')
# ์ถ ๋ ์ด๋ธ
plt.xlabel('X์ถ', fontsize=14, style='italic')
plt.ylabel('Y์ถ', fontsize=14, style='italic')
# ํ
์คํธ ์ถ๊ฐ
plt.text(5, 0.5, '์ฃผ์ ํ
์คํธ', fontsize=12,
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
# ํ์ดํ์ ์ฃผ์
plt.annotate('์ค์ ์ง์ ', xy=(7, 0.7), xytext=(8, 0.9),
arrowprops=dict(arrowstyle='->', color='red', lw=2),
fontsize=12)
plt.plot(x, y)
plt.show()
Practical Examplesโ
์์ 1: ์๋ณ ๋งค์ถ ๋์๋ณด๋โ
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# ๋ฐ์ดํฐ ์์ฑ
months = ['1์', '2์', '3์', '4์', '5์', '6์']
sales_2023 = [120, 135, 148, 162, 155, 178]
sales_2024 = [145, 158, 170, 185, 192, 205]
target = [150] * 6
# ๋์๋ณด๋ ์์ฑ
fig = plt.figure(figsize=(16, 10))
fig.suptitle('2024๋
์๋ฐ๊ธฐ ๋งค์ถ ๋ถ์ ๋์๋ณด๋', fontsize=20, fontweight='bold')
# 1. ์๋ณ ๋งค์ถ ๋น๊ต (์ ๊ทธ๋ํ)
ax1 = plt.subplot(2, 3, 1)
ax1.plot(months, sales_2023, marker='o', linewidth=2, label='2023๋
', color='skyblue')
ax1.plot(months, sales_2024, marker='s', linewidth=2, label='2024๋
', color='coral')
ax1.plot(months, target, linestyle='--', linewidth=2, label='๋ชฉํ', color='green')
ax1.set_title('์๋ณ ๋งค์ถ ์ถ์ด', fontsize=14, fontweight='bold')
ax1.set_ylabel('๋งค์ถ (๋ฐฑ๋ง์)')
ax1.legend()
ax1.grid(True, alpha=0.3)
# 2. ์ฐ๋๋ณ ์ด ๋งค์ถ (๋ง๋ ๊ทธ๋ํ)
ax2 = plt.subplot(2, 3, 2)
total_sales = [sum(sales_2023), sum(sales_2024)]
colors = ['skyblue', 'coral']
bars = ax2.bar(['2023๋
', '2024๋
'], total_sales, color=colors, edgecolor='black', linewidth=2)
ax2.set_title('์ฐ๋๋ณ ์ด ๋งค์ถ', fontsize=14, fontweight='bold')
ax2.set_ylabel('๋งค์ถ (๋ฐฑ๋ง์)')
for bar in bars:
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height,
f'{int(height)}M', ha='center', va='bottom', fontsize=12, fontweight='bold')
ax2.grid(axis='y', alpha=0.3)
# 3. ์ฆ๊ฐ์จ (๋ง๋ ๊ทธ๋ํ)
ax3 = plt.subplot(2, 3, 3)
growth = [(s24 - s23) / s23 * 100 for s23, s24 in zip(sales_2023, sales_2024)]
colors_growth = ['green' if g > 0 else 'red' for g in growth]
ax3.bar(months, growth, color=colors_growth, alpha=0.7, edgecolor='black')
ax3.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
ax3.set_title('์ ๋
๋๋น ์ฆ๊ฐ์จ', fontsize=14, fontweight='bold')
ax3.set_ylabel('์ฆ๊ฐ์จ (%)')
ax3.grid(axis='y', alpha=0.3)
# 4. ๋ชฉํ ๋ฌ์ฑ๋ฅ (์ํ ๋ง๋)
ax4 = plt.subplot(2, 3, 4)
achievement = [s / t * 100 for s, t in zip(sales_2024, target)]
colors_achievement = ['green' if a >= 100 else 'orange' for a in achievement]
ax4.barh(months, achievement, color=colors_achievement, alpha=0.7, edgecolor='black')
ax4.axvline(x=100, color='red', linestyle='--', linewidth=2, label='๋ชฉํ์ ')
ax4.set_title('์๋ณ ๋ชฉํ ๋ฌ์ฑ๋ฅ ', fontsize=14, fontweight='bold')
ax4.set_xlabel('๋ฌ์ฑ๋ฅ (%)')
ax4.legend()
ax4.grid(axis='x', alpha=0.3)
# 5. ๋งค์ถ ๋ถํฌ (ํ์คํ ๊ทธ๋จ)
ax5 = plt.subplot(2, 3, 5)
all_sales = sales_2023 + sales_2024
ax5.hist(all_sales, bins=10, color='mediumpurple', edgecolor='black', alpha=0.7)
ax5.axvline(np.mean(all_sales), color='red', linestyle='--',
linewidth=2, label=f'ํ๊ท : {np.mean(all_sales):.1f}M')
ax5.set_title('๋งค์ถ ๋ถํฌ', fontsize=14, fontweight='bold')
ax5.set_xlabel('๋งค์ถ (๋ฐฑ๋ง์)')
ax5.set_ylabel('๋น๋')
ax5.legend()
ax5.grid(axis='y', alpha=0.3)
# 6. ํต๊ณ ์์ฝ (ํ
์คํธ)
ax6 = plt.subplot(2, 3, 6)
ax6.axis('off')
stats_text = f"""
๐ ์ฃผ์ ์งํ ์์ฝ
2024๋
์ด ๋งค์ถ: {sum(sales_2024):,}๋ฐฑ๋ง์
2023๋
์ด ๋งค์ถ: {sum(sales_2023):,}๋ฐฑ๋ง์
์ฆ๊ฐ: +{sum(sales_2024) - sum(sales_2023):,}๋ฐฑ๋ง์
์ฆ๊ฐ์จ: +{(sum(sales_2024) - sum(sales_2023)) / sum(sales_2023) * 100:.1f}%
์ํ๊ท ๋งค์ถ: {np.mean(sales_2024):.1f}๋ฐฑ๋ง์
์ต๊ณ ๋งค์ถ: {max(sales_2024)}๋ฐฑ๋ง์ ({months[sales_2024.index(max(sales_2024))]})
์ต์ ๋งค์ถ: {min(sales_2024)}๋ฐฑ๋ง์ ({months[sales_2024.index(min(sales_2024))]})
๋ชฉํ ๋ฌ์ฑ ์์: {sum(1 for s in sales_2024 if s >= 150)}/6๊ฐ์
ํ๊ท ๋ฌ์ฑ๋ฅ : {np.mean(achievement):.1f}%
"""
ax6.text(0.1, 0.5, stats_text, fontsize=12, verticalalignment='center',
fontfamily='monospace',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
plt.tight_layout()
plt.show()
์์ 2: ์ ํ ํ๋งค ๋ถ์โ
import matplotlib.pyplot as plt
import pandas as pd
# ์ ํ ํ๋งค ๋ฐ์ดํฐ
data = {
'์ ํ': ['๋
ธํธ๋ถ', '๋ง์ฐ์ค', 'ํค๋ณด๋', '๋ชจ๋ํฐ', 'ํค๋์
'],
'ํ๋งค๋': [450, 1200, 800, 350, 600],
'๋งค์ถ': [450000, 36000, 80000, 350000, 180000],
'์ฌ๊ณ ': [120, 450, 280, 90, 200]
}
df = pd.DataFrame(data)
# ๋์๋ณด๋
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('์ ํ ํ๋งค ํํฉ', fontsize=18, fontweight='bold')
# 1. ์ ํ๋ณ ํ๋งค๋
axes[0, 0].bar(df['์ ํ'], df['ํ๋งค๋'], color='steelblue', edgecolor='black')
axes[0, 0].set_title('์ ํ๋ณ ํ๋งค๋', fontsize=14, fontweight='bold')
axes[0, 0].set_ylabel('ํ๋งค๋')
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].grid(axis='y', alpha=0.3)
# 2. ์ ํ๋ณ ๋งค์ถ
colors = plt.cm.Set3(range(len(df)))
axes[0, 1].barh(df['์ ํ'], df['๋งค์ถ'], color=colors, edgecolor='black')
axes[0, 1].set_title('์ ํ๋ณ ๋งค์ถ', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('๋งค์ถ (์)')
for i, v in enumerate(df['๋งค์ถ']):
axes[0, 1].text(v, i, f' {v:,}', va='center', fontsize=10)
axes[0, 1].grid(axis='x', alpha=0.3)
# 3. ๋งค์ถ ๋น์จ (ํ์ด ์ฐจํธ)
explode = [0.05] * len(df)
explode[df['๋งค์ถ'].idxmax()] = 0.15 # ์ต๋ ๋งค์ถ ์ ํ ๊ฐ์กฐ
axes[1, 0].pie(df['๋งค์ถ'], labels=df['์ ํ'], autopct='%1.1f%%',
explode=explode, shadow=True, startangle=90)
axes[1, 0].set_title('๋งค์ถ ๋น์จ', fontsize=14, fontweight='bold')
# 4. ํ๋งค๋ vs ์ฌ๊ณ (์ฐ์ ๋)
axes[1, 1].scatter(df['ํ๋งค๋'], df['์ฌ๊ณ '], s=df['๋งค์ถ']/500,
c=range(len(df)), cmap='viridis', alpha=0.6, edgecolors='black', linewidth=2)
for i, txt in enumerate(df['์ ํ']):
axes[1, 1].annotate(txt, (df['ํ๋งค๋'][i], df['์ฌ๊ณ '][i]),
xytext=(5, 5), textcoords='offset points', fontsize=10)
axes[1, 1].set_title('ํ๋งค๋ vs ์ฌ๊ณ (ํฌ๊ธฐ=๋งค์ถ)', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('ํ๋งค๋')
axes[1, 1].set_ylabel('์ฌ๊ณ ')
axes[1, 1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
์์ 3: ์๊ณ์ด ๋ฐ์ดํฐ ์๊ฐํโ
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# ์ผ์ผ ๋ฐฉ๋ฌธ์ ๋ฐ์ดํฐ ์์ฑ
dates = pd.date_range('2024-01-01', '2024-06-30', freq='D')
np.random.seed(42)
base_visitors = 1000
trend = np.linspace(0, 500, len(dates))
seasonality = 200 * np.sin(2 * np.pi * np.arange(len(dates)) / 7) # ์ฃผ๊ฐ ํจํด
noise = np.random.normal(0, 50, len(dates))
visitors = base_visitors + trend + seasonality + noise
df = pd.DataFrame({
'date': dates,
'visitors': visitors.astype(int)
})
df['month'] = df['date'].dt.month
df['weekday'] = df['date'].dt.day_name()
# ์๊ฐํ
fig = plt.figure(figsize=(16, 12))
fig.suptitle('์น์ฌ์ดํธ ๋ฐฉ๋ฌธ์ ๋ถ์ (2024๋
์๋ฐ๊ธฐ)', fontsize=20, fontweight='bold')
# 1. ์ ์ฒด ์ถ์ด
ax1 = plt.subplot(3, 2, 1)
ax1.plot(df['date'], df['visitors'], linewidth=1, alpha=0.7, label='์ผ์ผ ๋ฐฉ๋ฌธ์')
# ์ด๋ ํ๊ท
ma7 = df['visitors'].rolling(window=7).mean()
ma30 = df['visitors'].rolling(window=30).mean()
ax1.plot(df['date'], ma7, linewidth=2, label='7์ผ ์ด๋ํ๊ท ', color='orange')
ax1.plot(df['date'], ma30, linewidth=2, label='30์ผ ์ด๋ํ๊ท ', color='red')
ax1.set_title('์ผ์ผ ๋ฐฉ๋ฌธ์ ์ถ์ด', fontsize=14, fontweight='bold')
ax1.set_ylabel('๋ฐฉ๋ฌธ์ ์')
ax1.legend()
ax1.grid(True, alpha=0.3)
# 2. ์๋ณ ํ๊ท
ax2 = plt.subplot(3, 2, 2)
monthly_avg = df.groupby('month')['visitors'].mean()
month_names = ['1์', '2์', '3์', '4์', '5์', '6์']
bars = ax2.bar(month_names, monthly_avg, color='skyblue', edgecolor='black')
ax2.set_title('์๋ณ ํ๊ท ๋ฐฉ๋ฌธ์', fontsize=14, fontweight='bold')
ax2.set_ylabel('ํ๊ท ๋ฐฉ๋ฌธ์ ์')
for bar in bars:
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height,
f'{int(height)}', ha='center', va='bottom', fontsize=10)
ax2.grid(axis='y', alpha=0.3)
# 3. ์์ผ๋ณ ํจํด
ax3 = plt.subplot(3, 2, 3)
weekday_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekday_labels = ['์', 'ํ', '์', '๋ชฉ', '๊ธ', 'ํ ', '์ผ']
weekday_avg = df.groupby('weekday')['visitors'].mean().reindex(weekday_order)
colors = ['#FF6B6B' if day in ['Saturday', 'Sunday'] else '#4ECDC4' for day in weekday_order]
ax3.bar(weekday_labels, weekday_avg, color=colors, edgecolor='black')
ax3.set_title('์์ผ๋ณ ํ๊ท ๋ฐฉ๋ฌธ์', fontsize=14, fontweight='bold')
ax3.set_ylabel('ํ๊ท ๋ฐฉ๋ฌธ์ ์')
ax3.grid(axis='y', alpha=0.3)
# 4. ๋ฐฉ๋ฌธ์ ๋ถํฌ
ax4 = plt.subplot(3, 2, 4)
ax4.hist(df['visitors'], bins=30, color='mediumpurple', edgecolor='black', alpha=0.7)
ax4.axvline(df['visitors'].mean(), color='red', linestyle='--',
linewidth=2, label=f"ํ๊ท : {df['visitors'].mean():.0f}")
ax4.axvline(df['visitors'].median(), color='green', linestyle='--',
linewidth=2, label=f"์ค์๊ฐ: {df['visitors'].median():.0f}")
ax4.set_title('๋ฐฉ๋ฌธ์ ๋ถํฌ', fontsize=14, fontweight='bold')
ax4.set_xlabel('๋ฐฉ๋ฌธ์ ์')
ax4.set_ylabel('์ผ์')
ax4.legend()
ax4.grid(axis='y', alpha=0.3)
# 5. ์๋ณ ๋ฐ์คํ๋กฏ
ax5 = plt.subplot(3, 2, 5)
monthly_data = [df[df['month'] == m]['visitors'].values for m in range(1, 7)]
bp = ax5.boxplot(monthly_data, labels=month_names, patch_artist=True)
for patch in bp['boxes']:
patch.set_facecolor('lightblue')
ax5.set_title('์๋ณ ๋ฐฉ๋ฌธ์ ๋ถํฌ', fontsize=14, fontweight='bold')
ax5.set_ylabel('๋ฐฉ๋ฌธ์ ์')
ax5.grid(axis='y', alpha=0.3)
# 6. ํต๊ณ ์์ฝ
ax6 = plt.subplot(3, 2, 6)
ax6.axis('off')
stats = f"""
๐ ๋ฐฉ๋ฌธ์ ํต๊ณ ์์ฝ
์ด ๋ถ์ ๊ธฐ๊ฐ: {len(df)}์ผ
์ด ๋ฐฉ๋ฌธ์: {df['visitors'].sum():,}๋ช
ํ๊ท ๋ฐฉ๋ฌธ์: {df['visitors'].mean():.0f}๋ช
์ค์๊ฐ: {df['visitors'].median():.0f}๋ช
ํ์คํธ์ฐจ: {df['visitors'].std():.0f}
์ต๋ ๋ฐฉ๋ฌธ: {df['visitors'].max()}๋ช
({df[df['visitors'] == df['visitors'].max()]['date'].dt.date.values[0]})
์ต์ ๋ฐฉ๋ฌธ: {df['visitors'].min()}๋ช
({df[df['visitors'] == df['visitors'].min()]['date'].dt.date.values[0]})
๊ฐ์ฅ ๋ง์ ์์ผ: {weekday_labels[weekday_avg.argmax()]}์์ผ
๊ฐ์ฅ ์ ์ ์์ผ: {weekday_labels[weekday_avg.argmin()]}์์ผ
์ฆ๊ฐ ์ถ์ธ: +{(df['visitors'].iloc[-30:].mean() - df['visitors'].iloc[:30].mean()):.0f}๋ช
"""
ax6.text(0.1, 0.5, stats, fontsize=11, verticalalignment='center',
fontfamily='monospace',
bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.5))
plt.tight_layout()
plt.show()
Saving and Exportingโ
import matplotlib.pyplot as plt
# ๊ทธ๋ํ ์์ฑ
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot([1, 2, 3, 4], [1, 4, 2, 3])
ax.set_title('์ํ ๊ทธ๋ํ')
# ์ด๋ฏธ์ง๋ก ์ ์ฅ
plt.savefig('graph.png', dpi=300, bbox_inches='tight') # PNG
plt.savefig('graph.pdf', bbox_inches='tight') # PDF
plt.savefig('graph.svg', bbox_inches='tight') # SVG
# ํฌ๋ช
๋ฐฐ๊ฒฝ
plt.savefig('graph.png', dpi=300, bbox_inches='tight', transparent=True)
plt.close() # ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ
Frequently Asked Questionsโ
ํ๊ธ์ด ๊นจ์ ธ์!โ
import matplotlib.pyplot as plt
# macOS
plt.rcParams['font.family'] = 'AppleGothic'
# Windows
plt.rcParams['font.family'] = 'Malgun Gothic'
# Linux
plt.rcParams['font.family'] = 'NanumGothic'
# ๋ง์ด๋์ค ๊ธฐํธ ๊นจ์ง ๋ฐฉ์ง
plt.rcParams['axes.unicode_minus'] = False
๊ทธ๋ํ๊ฐ ๊ฒน์ณ์!โ
# tight_layout ์ฌ์ฉ
plt.tight_layout()
# ๋๋ ์ฌ๋ฐฑ ์กฐ์
plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1,
wspace=0.3, hspace=0.3)