In Python, types of variables are automatically inferred by the interpreter
school = "Paul G. Allen School" # creates a variable of type str
number = 446 # int
pi = 3.141 # float
print("department:", school)
print("number:", number)
print("pi:", pi)
print(type(school), type(number), type(pi))
department: Paul G. Allen School number: 446 pi: 3.141 <class 'str'> <class 'int'> <class 'float'>
a = 13
b = 4
print(a + b)
print(a / b) # floating point division
print(a // b) # integer division
print(b**2) # powers are built-in
17 3.25 3 16
Python cares about whitespace! There are no brackets or end statements
Instead of &&, ||, !, we use 'and', 'or', and 'not'.
Boolean values are written as 'True' and 'False'
a = 9.1
b = 7
# basic if else syntax
if a < 10:
print('hello there!')
else:
print('general kenobi')
print('this always prints')
hello there! this always prints
# and syntax
if a < 10 and b > 5:
print('and')
# or syntax
if a > 10 or b > 5:
print('or')
and or
# else, else if syntax
if False:
print('this never prints')
elif a != b:
print('a != b')
else:
print('else!')
a != b
# Lists
fruit = ['apple', 'pear', 'tomato', 'avocado']
print(fruit)
print(fruit[0])
print(fruit[-2]) # can use reverse indexing
print(fruit[1:3]) # can index multiple items [inclusive, exclusive)
fruit[3] = 'salmon' # can assign items in list
print(fruit)
['apple', 'pear', 'tomato', 'avocado'] apple tomato ['pear', 'tomato'] ['apple', 'pear', 'tomato', 'salmon']
print(len(fruit))
print('pear' in fruit) # can check for existence in a list ("contains()")
4 True
fruit.append('trout') # lists are mutable
print(fruit)
print(len(fruit))
['apple', 'pear', 'tomato', 'salmon', 'trout'] 5
# tuples
tupperware = (1,2,3)
print(tupperware)
print(tupperware[1])
a, b, c = tupperware # can unpack tuples
_, d, _ = tupperware # ignore the elements you don't care about
print(c)
print(d)
(1, 2, 3) 2 3 2
try:
tupperware[1] = 4 # cannot modify tuples
except Exception as e:
print(e)
'tuple' object does not support item assignment
# dicts (maps)
ages = {'Jocelyn': 21, 'Newton': 84}
print(ages)
print(ages['Jocelyn'])
ages['Einstein'] = 76
ages[5] = 'hi'
print(ages)
{'Jocelyn': 21, 'Newton': 84} 21 {'Jocelyn': 21, 'Newton': 84, 'Einstein': 76, 5: 'hi'}
# basic for loops
for i in range(5):
print(i)
print()
# can loop over any iterable
for f in fruit:
print(f)
0 1 2 3 4 apple pear tomato salmon trout
# advanced for loops
for idx, item in enumerate(fruit):
print(idx, item)
print()
a = [3, 1, 4, 1, 5]
b = [2, 7, 1, 8, 2]
# can join the iterators together
for f, pi, e in zip(fruit, a, b):
print(f, pi, e)
0 apple 1 pear 2 tomato 3 salmon 4 trout apple 3 2 pear 1 7 tomato 4 1 salmon 1 8 trout 5 2
# while loops
my_str = "Jocelyn"
while len(my_str) < 20:
my_str += ' 20'
print(my_str)
Jocelyn 20 20 20 20 20
# this is a function
def square_this(x):
return x * x
print(square_this(4))
16
class Squaring:
# this is the constructor
def __init__(self, a, b=4):
self._a = a
self._b = b
# this is a method
def square(self, x):
return x * x
# this is a static method
@staticmethod
def mymethod(x):
return x**3
sq = Squaring(3)
print('method:', sq.square(3))
print('static method:', Squaring.mymethod(5))
print('a:', sq._a)
print('b:', sq._b)
method: 9 static method: 125 a: 3 b: 4
Numpy is a powerful scientific computing library for Python. It uses C/C++ in the background to speed up computation
import numpy as np
a = np.array([1,2,3,4,5])
b = np.array([5,4,3,2,1])
c = a + b # can do elementwise operations
print(c)
print(a*b)
print(a[3:5]) # can use same indexing as lists
print(type(a))
[6 6 6 6 6] [5 8 9 8 5] [4 5] <class 'numpy.ndarray'>
A = np.array([[1,2], [3,4]]) # can create multidimensional arrays
print("A:\n", A)
print(A[0]) # indexing into rows
print("operations:\n", A[:,1]) # indexing into columns
print(A[0,1]) # indexing into rows and columns
print(A[:,1].T) # transposing a column
print(np.shape(A[:,1])) # shape of the matrix
print("matrix transpose:\n", A.T)
print("matrix inverse:\n", np.linalg.pinv(A))
A: [[1 2] [3 4]] [1 2] operations: [2 4] 2 [2 4] (2,) matrix transpose: [[1 3] [2 4]] matrix inverse: [[-2. 1. ] [ 1.5 -0.5]]
# can create special types of arrays
a = np.zeros((4,4))
b = np.ones(3)
c = np.random.random(5)
print(a)
print(b)
print(c)
[[0. 0. 0. 0.] [0. 0. 0. 0.] [0. 0. 0. 0.] [0. 0. 0. 0.]] [1. 1. 1.] [0.90038264 0.86421204 0.8201528 0.2103761 0.71113902]
# matrices
A = np.array([[1,2],[3,4]])
B = np.array([[1, 0],[0, 1]]) # identity matrix (same as np.eye(2))
print(A + B)
print()
print(A @ B)# @ is matrix multiplication
print(A * B)# * is elementwise multiplication. You probably don't mean this
[[2 2] [3 5]] [[1 2] [3 4]] [[1 0] [0 4]]
# numpy functions (see documentation for more)
A = np.array([[1,1],[2,2],[3,3]])
print(A)
print(np.sum(A)) # can sum all elements
print(np.sum(A, axis=0)) # can also sum along a dimension (0)
print(np.sum(A, axis=1)) # can also sum along a dimension (1)
[[1 1] [2 2] [3 3]] 12 [6 6] [2 4 6]
# use numpy to speedup computation
# as a rule, use numpy operations to calculate things, not for loops.
# e.g. call np.dot don't sum over a for loop of the elements.
import time
n = 100000
a = np.ones(n)
b = np.ones(n)
start = time.time()
res = a + b
numpy_time = time.time() - start
print("time:", numpy_time)
res = []
start = time.time()
for i in range(n):
res.append(a[i] + b[i])
loop_time = time.time() - start
print("time:", loop_time)
print("speedup x:", loop_time / numpy_time)
time: 0.0008039474487304688 time: 0.07655882835388184 speedup x: 95.22864768683274
Matplotlib is a library used to visualize data. A useful technique when generating matplotlib plots in a notebook is that we can use the "magic" command %matplotlib inline
to have the generated plots automatically shown in a cell; this saves some additional Python code which would be required to show the plots (e.g. plt.show()
).
import matplotlib.pyplot as plt
%matplotlib inline
# basic plotting (show scatter and plot)
xs = np.arange(10)
ys = xs ** 2
plt.scatter(xs, ys)
plt.plot(xs, ys)
# ALWAYS label your plots!
plt.title('plot of $x^2$ vs $x$')
plt.xlabel('$x$')
plt.ylabel('$x^2$')
Text(0, 0.5, '$x^2$')
You can also plot multiple items in one graph
xs = np.arange(5)
linear = xs
linear2 = 2 * xs
linear3 = 5 - 3 * xs
quadratic = xs ** 2 - 5
plt.plot(xs, linear, 'o-', label="$f(x)=x$")
plt.plot(xs, linear2, 'o-', label="$f(x)=2x$")
plt.plot(xs, linear3, 'o-', label="$f(x)=5 - 3x$")
plt.plot(xs, quadratic, 'o-', label="$f(x)= x^2 - 5$") # you can format the labels using latex syntax
plt.legend()
plt.title('function plots')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
Text(0, 0.5, '$f(x)$')
Seaborn is another libary that can be used to visualize data which is based on matplotlib. Seaborn provides a high-level interface for drawing attrative and informative statistical graphs.
Lets plot the same functions as above but using seaborn instead.
import pandas as pd
d = {'$f(x)=x$': linear, '$f(x)=2x$': linear2, '$f(x)=5 - 3x$': linear3, '$f(x)= x^2 - 5$': quadratic}
df = pd.DataFrame(data=d)
df
$f(x)=x$ | $f(x)=2x$ | $f(x)=5 - 3x$ | $f(x)= x^2 - 5$ | |
---|---|---|---|---|
0 | 0 | 0 | 5 | -5 |
1 | 1 | 2 | 2 | -4 |
2 | 2 | 4 | -1 | -1 |
3 | 3 | 6 | -4 | 4 |
4 | 4 | 8 | -7 | 11 |
import seaborn as sns
ax = sns.lineplot(data=df)
ax.set(xlabel="x", ylabel="f(x)")
[Text(0, 0.5, 'f(x)'), Text(0.5, 0, 'x')]
IPython notebooks support the use of shell commands directly in a notebook; simply prepend the command with !
and the IPython kernel will execute the code from the shell, not the Python interpreter.
!find . -name "*.ipynb" | wc -l
1