NumPy Basics ๐
NumPy (Numerical Python) is the core library for scientific computing in Python. It provides powerful multidimensional array objects and fast computation capabilities.
What is NumPy?โ
NumPy supports large multidimensional arrays and matrix operations, and includes a library of mathematical functions.
Key Featuresโ
- Fast Performance: Implemented in C, 10-100x faster than pure Python
- Memory Efficiency: Uses contiguous memory blocks
- Broadcasting: Operations between arrays of different sizes
- Vectorization: Operations on entire arrays without loops
Installationโ
pip install numpy
import numpy as np
# ๋ฒ์ ํ์ธ
print(np.__version__) # 1.24.3
Creating ndarraysโ
Basic Creation Methodsโ
import numpy as np
# ๋ฆฌ์คํธ์์ ์์ฑ
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1) # [1 2 3 4 5]
print(type(arr1)) # <class 'numpy.ndarray'>
# 2์ฐจ์ ๋ฐฐ์ด
arr2 = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2)
# [[1 2 3]
# [4 5 6]]
# 3์ฐจ์ ๋ฐฐ์ด
arr3 = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print(arr3.shape) # (2, 2, 2)
Creating Special Arraysโ
# 0์ผ๋ก ์ฑ์์ง ๋ฐฐ์ด
zeros = np.zeros((3, 4))
print(zeros)
# [[0. 0. 0. 0.]
# [0. 0. 0. 0.]
# [0. 0. 0. 0.]]
# 1๋ก ์ฑ์์ง ๋ฐฐ์ด
ones = np.ones((2, 3))
print(ones)
# [[1. 1. 1.]
# [1. 1. 1.]]
# ํน์ ๊ฐ์ผ๋ก ์ฑ์ฐ๊ธฐ
full = np.full((2, 2), 7)
print(full)
# [[7 7]
# [7 7]]
# ๋จ์ ํ๋ ฌ
identity = np.eye(3)
print(identity)
# [[1. 0. 0.]
# [0. 1. 0.]
# [0. 0. 1.]]
# ๋ฒ์ ๋ฐฐ์ด
range_arr = np.arange(0, 10, 2)
print(range_arr) # [0 2 4 6 8]
# ๊ท ๋ฑ ๊ฐ๊ฒฉ ๋ฐฐ์ด
linspace = np.linspace(0, 1, 5)
print(linspace) # [0. 0.25 0.5 0.75 1. ]
# ๋๋ค ๋ฐฐ์ด
random_arr = np.random.rand(3, 3) # 0~1 ์ฌ์ด ๊ท ๋ฑ ๋ถํฌ
print(random_arr)
random_int = np.random.randint(1, 100, size=(3, 3)) # ์ ์ ๋๋ค
print(random_int)
Array Propertiesโ
arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
print(arr.shape) # (2, 4) - ๋ฐฐ์ด์ ํํ
print(arr.ndim) # 2 - ์ฐจ์ ์
print(arr.size) # 8 - ์ ์ฒด ์์ ๊ฐ์
print(arr.dtype) # int64 - ๋ฐ์ดํฐ ํ์
print(arr.itemsize) # 8 - ๊ฐ ์์์ ๋ฐ์ดํธ ํฌ๊ธฐ
Indexing and Slicingโ
1D Arraysโ
arr = np.array([10, 20, 30, 40, 50])
print(arr[0]) # 10 - ์ฒซ ๋ฒ์งธ ์์
print(arr[-1]) # 50 - ๋ง์ง๋ง ์์
print(arr[1:4]) # [20 30 40] - ์ฌ๋ผ์ด์ฑ
print(arr[::2]) # [10 30 50] - 2์นธ์ฉ ๊ฑด๋๋ฐ๊ธฐ
print(arr[::-1]) # [50 40 30 20 10] - ์ญ์
2D Arraysโ
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(arr[0, 0]) # 1 - ์ฒซ ๋ฒ์งธ ํ, ์ฒซ ๋ฒ์งธ ์ด
print(arr[1, 2]) # 6 - ๋ ๋ฒ์งธ ํ, ์ธ ๋ฒ์งธ ์ด
print(arr[0]) # [1 2 3] - ์ฒซ ๋ฒ์งธ ํ ์ ์ฒด
print(arr[:, 1]) # [2 5 8] - ๋ ๋ฒ์งธ ์ด ์ ์ฒด
print(arr[0:2, 1:3]) # [[2 3] [5 6]] - ๋ถ๋ถ ๋ฐฐ์ด
Boolean Indexingโ
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
# ์กฐ๊ฑด์ ๋ง์กฑํ๋ ์์๋ง ์ ํ
print(arr[arr > 5]) # [6 7 8 9 10]
print(arr[arr % 2 == 0]) # [2 4 6 8 10]
# ์ฌ๋ฌ ์กฐ๊ฑด
print(arr[(arr > 3) & (arr < 8)]) # [4 5 6 7]
Array Operationsโ
Basic Arithmetic Operationsโ
arr1 = np.array([1, 2, 3, 4])
arr2 = np.array([10, 20, 30, 40])
print(arr1 + arr2) # [11 22 33 44]
print(arr1 - arr2) # [-9 -18 -27 -36]
print(arr1 * arr2) # [10 40 90 160]
print(arr1 / arr2) # [0.1 0.1 0.1 0.1]
print(arr1 ** 2) # [1 4 9 16]
# ์ค์นผ๋ผ ์ฐ์ฐ
print(arr1 + 10) # [11 12 13 14]
print(arr1 * 2) # [2 4 6 8]
Mathematical Functionsโ
arr = np.array([1, 4, 9, 16])
print(np.sqrt(arr)) # [1. 2. 3. 4.]
print(np.exp(arr)) # ์ง์ ํจ์
print(np.log(arr)) # ์์ฐ ๋ก๊ทธ
print(np.sin(arr)) # ์ผ๊ฐ ํจ์
# ์ต๋/์ต์
arr = np.array([3, 1, 4, 1, 5, 9, 2, 6])
print(np.max(arr)) # 9
print(np.min(arr)) # 1
print(np.argmax(arr)) # 5 - ์ต๋๊ฐ์ ์ธ๋ฑ์ค
print(np.argmin(arr)) # 1 - ์ต์๊ฐ์ ์ธ๋ฑ์ค
Broadcastingโ
Broadcasting automatically handles operations between arrays of different sizes.
# 1์ฐจ์ ๋ฐฐ์ด + ์ค์นผ๋ผ
arr = np.array([1, 2, 3])
print(arr + 5) # [6 7 8]
# 2์ฐจ์ ๋ฐฐ์ด + 1์ฐจ์ ๋ฐฐ์ด
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
arr1d = np.array([10, 20, 30])
print(arr2d + arr1d)
# [[11 22 33]
# [14 25 36]]
# 2์ฐจ์ ๋ฐฐ์ด + ์ด ๋ฒกํฐ
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
col_vec = np.array([[10], [20]])
print(arr2d + col_vec)
# [[11 12 13]
# [24 25 26]]
Aggregation Functionsโ
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(np.sum(arr)) # 21 - ์ ์ฒด ํฉ
print(np.mean(arr)) # 3.5 - ํ๊ท
print(np.std(arr)) # 1.707... - ํ์คํธ์ฐจ
print(np.var(arr)) # 2.916... - ๋ถ์ฐ
print(np.median(arr)) # 3.5 - ์ค์๊ฐ
# ์ถ ์ง์
print(np.sum(arr, axis=0)) # [5 7 9] - ์ด ๋ฐฉํฅ ํฉ
print(np.sum(arr, axis=1)) # [6 15] - ํ ๋ฐฉํฅ ํฉ
print(np.mean(arr, axis=0)) # [2.5 3.5 4.5] - ์ด ํ๊ท
print(np.mean(arr, axis=1)) # [2. 5.] - ํ ํ๊ท
Reshaping Arraysโ
arr = np.array([1, 2, 3, 4, 5, 6])
# reshape - ํํ ๋ณ๊ฒฝ
reshaped = arr.reshape(2, 3)
print(reshaped)
# [[1 2 3]
# [4 5 6]]
# flatten - 1์ฐจ์์ผ๋ก ํผ์น๊ธฐ
flattened = reshaped.flatten()
print(flattened) # [1 2 3 4 5 6]
# transpose - ์ ์น
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2d.T)
# [[1 4]
# [2 5]
# [3 6]]
# ์ฐจ์ ์ถ๊ฐ
arr = np.array([1, 2, 3])
expanded = np.expand_dims(arr, axis=0)
print(expanded.shape) # (1, 3)
Combining Arraysโ
arr1 = np.array([[1, 2], [3, 4]])
arr2 = np.array([[5, 6], [7, 8]])
# ์์ง ๊ฒฐํฉ
vstack = np.vstack([arr1, arr2])
print(vstack)
# [[1 2]
# [3 4]
# [5 6]
# [7 8]]
# ์ํ ๊ฒฐํฉ
hstack = np.hstack([arr1, arr2])
print(hstack)
# [[1 2 5 6]
# [3 4 7 8]]
# concatenate
concat = np.concatenate([arr1, arr2], axis=0) # ํ ๋ฐฉํฅ
print(concat)
Practical Examplesโ
Example 1: Student Grade Statisticsโ
import numpy as np
# ํ์ 5๋ช
์ 3๊ณผ๋ชฉ ์ฑ์ (ํ: ํ์, ์ด: ๊ณผ๋ชฉ)
scores = np.array([
[85, 90, 78], # ํ์ 1
[92, 88, 95], # ํ์ 2
[78, 85, 80], # ํ์ 3
[95, 92, 88], # ํ์ 4
[88, 86, 92] # ํ์ 5
])
# ๊ฐ ํ์์ ํ๊ท ์ ์
student_avg = np.mean(scores, axis=1)
print("ํ์๋ณ ํ๊ท :", student_avg)
# [84.33 91.67 81. 91.67 88.67]
# ๊ฐ ๊ณผ๋ชฉ์ ํ๊ท ์ ์
subject_avg = np.mean(scores, axis=0)
print("๊ณผ๋ชฉ๋ณ ํ๊ท :", subject_avg)
# [87.6 88.2 86.6]
# ์ ์ฒด ํ๊ท
total_avg = np.mean(scores)
print(f"์ ์ฒด ํ๊ท : {total_avg:.2f}") # 87.47
# ์ต๊ณ ์ ์์ ํ์
max_score = np.max(scores)
max_position = np.unravel_index(np.argmax(scores), scores.shape)
print(f"์ต๊ณ ์ ์: {max_score} (ํ์ {max_position[0]+1}, ๊ณผ๋ชฉ {max_position[1]+1})")
# 90์ ์ด์ ๋ฐ์ ํ์
high_scores = np.sum(scores >= 90)
print(f"90์ ์ด์: {high_scores}ํ") # 6ํ
Example 2: Matrix Operations and Linear Algebraโ
import numpy as np
# ํ๋ ฌ ๊ณฑ์
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
# ํ๋ ฌ ๊ณฑ (๋ด์ )
C = np.dot(A, B)
# ๋๋ C = A @ B
print("ํ๋ ฌ ๊ณฑ:")
print(C)
# [[19 22]
# [43 50]]
# ์ญํ๋ ฌ
inv_A = np.linalg.inv(A)
print("์ญํ๋ ฌ:")
print(inv_A)
# [[-2. 1. ]
# [ 1.5 -0.5]]
# ์ญํ๋ ฌ ๊ฒ์ฆ
identity = A @ inv_A
print("A ร A^(-1):")
print(np.round(identity, 2)) # ๋จ์ ํ๋ ฌ
# ๊ณ ์ ๊ฐ๊ณผ ๊ณ ์ ๋ฒกํฐ
eigenvalues, eigenvectors = np.linalg.eig(A)
print("๊ณ ์ ๊ฐ:", eigenvalues)
print("๊ณ ์ ๋ฒกํฐ:")
print(eigenvectors)
# ํ๋ ฌ์
det = np.linalg.det(A)
print(f"ํ๋ ฌ์: {det}") # -2.0
Example 3: Calculating Moving Averagesโ
import numpy as np
# ์ฃผ์ ๊ฐ๊ฒฉ ๋ฐ์ดํฐ
prices = np.array([100, 102, 98, 105, 107, 103, 110, 108, 112, 115])
def moving_average(data, window_size):
"""์ด๋ ํ๊ท ๊ณ์ฐ"""
weights = np.ones(window_size) / window_size
return np.convolve(data, weights, mode='valid')
# 3์ผ ์ด๋ ํ๊ท
ma_3 = moving_average(prices, 3)
print("3์ผ ์ด๋ ํ๊ท :", np.round(ma_3, 2))
# 5์ผ ์ด๋ ํ๊ท
ma_5 = moving_average(prices, 5)
print("5์ผ ์ด๋ ํ๊ท :", np.round(ma_5, 2))
# ์์ต๋ฅ ๊ณ์ฐ
returns = (prices[1:] - prices[:-1]) / prices[:-1] * 100
print("์ผ์ผ ์์ต๋ฅ (%):", np.round(returns, 2))
Example 4: Basic Image Processingโ
import numpy as np
# ๊ฐ๋จํ ์ด๋ฏธ์ง (8x8 ๊ทธ๋ ์ด์ค์ผ์ผ)
image = np.random.randint(0, 256, size=(8, 8))
print("์๋ณธ ์ด๋ฏธ์ง:")
print(image)
# ์ด๋ฏธ์ง ๋ฐ์
inverted = 255 - image
print("\n๋ฐ์ ์ด๋ฏธ์ง:")
print(inverted)
# ๋ฐ๊ธฐ ์กฐ์ (+50)
brightened = np.clip(image + 50, 0, 255)
print("\n๋ฐ๊ฒ:")
print(brightened)
# ๋๋น ์ฆ๊ฐ (ร1.5)
contrasted = np.clip(image * 1.5, 0, 255).astype(np.uint8)
print("\n๋๋น ์ฆ๊ฐ:")
print(contrasted)
# ์ด์งํ (์๊ณ๊ฐ 128)
binary = np.where(image > 128, 255, 0)
print("\n์ด์งํ:")
print(binary)
Performance Comparisonโ
import numpy as np
import time
# ์์ Python vs NumPy
size = 1000000
# Python ๋ฆฌ์คํธ
python_list = list(range(size))
start = time.time()
result = [x * 2 for x in python_list]
python_time = time.time() - start
# NumPy ๋ฐฐ์ด
numpy_array = np.arange(size)
start = time.time()
result = numpy_array * 2
numpy_time = time.time() - start
print(f"Python ๋ฆฌ์คํธ: {python_time:.4f}์ด")
print(f"NumPy ๋ฐฐ์ด: {numpy_time:.4f}์ด")
print(f"NumPy๊ฐ {python_time/numpy_time:.1f}๋ฐฐ ๋น ๋ฆ")
Frequently Asked Questionsโ
What's the difference between NumPy arrays and Python lists?โ
NumPy Arrays:
- Store only data of the same type
- Memory efficient
- Fast operations (implemented in C)
- Support vectorized operations
Python Lists:
- Can mix different types
- Easy dynamic resizing
- Slow operations (pure Python)
What's the difference between axis=0 and axis=1?โ
arr = np.array([[1, 2, 3], [4, 5, 6]])
# axis=0: ํ ๋ฐฉํฅ (โ)
print(np.sum(arr, axis=0)) # [5 7 9]
# axis=1: ์ด ๋ฐฉํฅ (โ)
print(np.sum(arr, axis=1)) # [6 15]
What does -1 mean in reshape?โ
-1 means to automatically calculate the size.
arr = np.arange(12)
# ์๋์ผ๋ก 4 ๊ณ์ฐ
reshaped = arr.reshape(3, -1) # (3, 4)
# ์๋์ผ๋ก 3 ๊ณ์ฐ
reshaped = arr.reshape(-1, 4) # (3, 4)
# 1์ฐจ์์ผ๋ก ํผ์น๊ธฐ
flattened = arr.reshape(-1) # (12,)
Copy vs. View?โ
arr = np.array([1, 2, 3, 4, 5])
# ๋ทฐ (์๋ณธ ์ํฅ)
view = arr[1:4]
view[0] = 999
print(arr) # [1 999 3 4 5]
# ๋ณต์ฌ (์๋ณธ ์์ )
copy = arr[1:4].copy()
copy[0] = 777
print(arr) # [1 999 3 4 5] (๋ณ๊ฒฝ ์์)
Next Stepsโ
Once you've mastered NumPy, learn:
- Pandas: NumPy-based data analysis library
- SciPy: Advanced scientific computing features
- Matplotlib: NumPy array visualization
- Machine Learning: Start ML with scikit-learn