import numpy as np


v = np.array([3,-2,1])
b = np.array([ [6], [-3], [2.5], [-1], [0]])
print('v = ', v, ' and b = ', b)

v =  [ 3 -2  1]  and b =  [[ 6. ]
 [-3. ]
 [ 2.5]
 [-1. ]
 [ 0. ]]


print(v, '\n', b)

[ 3 -2  1] 
 [[ 6. ]
 [-3. ]
 [ 2.5]
 [-1. ]
 [ 0. ]]


print('v = ', v.T, ' and b = ', b.T)

v =  [ 3 -2  1]  and b =  [[ 6.  -3.   2.5 -1.   0. ]]


v = np.array([[3,-2,1]])
print(v.T)

[[ 3]
 [-2]
 [ 1]]


# this gives a list of numbers.
a = np.array([3, -2, 1])
print(a)
# ask for the shape - it is just (3,)
a.shape
# force the shape to be 3-row by 1-column 
a.shape = (3,1)
print(a)
# now print the transpose
print(a.T)

[ 3 -2  1]
[[ 3]
 [-2]
 [ 1]]
[[ 3 -2  1]]


# force b to have one row - a row vector
b = np.array([[3, -2, 1]])
print(b)
print('The shape of b is ', b.shape)
# and then transpose it to get a  column vector
b = np.array([[3, -2, 1]]).T
print(b)

[[ 3 -2  1]]
The shape of b is  (1, 3)
[[ 3]
 [-2]
 [ 1]]


a = np.array([3, -2, 1])
p = np.array([5, 2, -10])
g = a-p
print(g)
a = g+p
print(a)

[-2 -4 11]
[ 3 -2  1]


y = np.array([-3, 16, 1, 1089, 15])
z = -3*y
print(z)

[    9   -48    -3 -3267   -45]


u = np.array([-3, 2, 4, -1])
print('||u||_2 = ', np.linalg.norm(u))
# We can also specify the '2'
print('||u||_2 = ', np.linalg.norm(u, 2))

||u||_2 =  5.477225575051661
||u||_2 =  5.477225575051661


w = np.array([-19, 18, 2, 0, 0, -8, 34, 0, -57])
print('||w||_2   = ', np.linalg.norm(w,2))
print('||w||_1   = ', np.linalg.norm(w,1))
print('||w||_inf = ', np.linalg.norm(w,np.inf))  # note how we denote infinity
print('||w||_0   = ', np.linalg.norm(w,0))

||w||_2   =  71.8192174839019
||w||_1   =  138.0
||w||_inf =  57.0
||w||_0   =  6.0


import seaborn as sns
# we can now refer to the seaborn library functions using 'sns'
# note that you can use another character string - but 'sns' is standard.

# Now let's get the names of the built-in data sets.
sns.get_dataset_names()

# type SHIFT=RETURN to execute the highlighted (active) cell

['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic',
 'anagrams',
 'anagrams',
 'anscombe',
 'anscombe',
 'attention',
 'attention',
 'brain_networks',
 'brain_networks',
 'car_crashes',
 'car_crashes',
 'diamonds',
 'diamonds',
 'dots',
 'dots',
 'dowjones',
 'dowjones',
 'exercise',
 'exercise',
 'flights',
 'flights',
 'fmri',
 'fmri',
 'geyser',
 'geyser',
 'glue',
 'glue',
 'healthexp',
 'healthexp',
 'iris',
 'iris',
 'mpg',
 'mpg',
 'penguins',
 'penguins',
 'planets',
 'planets',
 'seaice',
 'seaice',
 'taxis',
 'taxis',
 'tips',
 'tips',
 'titanic',
 'titanic',
 'anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic']


# let's take a look at 'taxis'
dft = sns.load_dataset('taxis')
# this just plots the first few lines of the data
dft.head()


# in this, the variable dft is a pandas data frame: dft = data frame taxis
print(dft)

                   pickup              dropoff  passengers  distance  fare  \
0     2019-03-23 20:21:09  2019-03-23 20:27:24           1      1.60   7.0   
1     2019-03-04 16:11:55  2019-03-04 16:19:00           1      0.79   5.0   
2     2019-03-27 17:53:01  2019-03-27 18:00:25           1      1.37   7.5   
3     2019-03-10 01:23:59  2019-03-10 01:49:51           1      7.70  27.0   
4     2019-03-30 13:27:42  2019-03-30 13:37:14           3      2.16   9.0   
...                   ...                  ...         ...       ...   ...   
6428  2019-03-31 09:51:53  2019-03-31 09:55:27           1      0.75   4.5   
6429  2019-03-31 17:38:00  2019-03-31 18:34:23           1     18.74  58.0   
6430  2019-03-23 22:55:18  2019-03-23 23:14:25           1      4.14  16.0   
6431  2019-03-04 10:09:25  2019-03-04 10:14:29           1      1.12   6.0   
6432  2019-03-13 19:31:22  2019-03-13 19:48:02           1      3.85  15.0   

       tip  tolls  total   color      payment            pickup_zone  \
0     2.15    0.0  12.95  yellow  credit card        Lenox Hill West   
1     0.00    0.0   9.30  yellow         cash  Upper West Side South   
2     2.36    0.0  14.16  yellow  credit card          Alphabet City   
3     6.15    0.0  36.95  yellow  credit card              Hudson Sq   
4     1.10    0.0  13.40  yellow  credit card           Midtown East   
...    ...    ...    ...     ...          ...                    ...   
6428  1.06    0.0   6.36   green  credit card      East Harlem North   
6429  0.00    0.0  58.80   green  credit card                Jamaica   
6430  0.00    0.0  17.30   green         cash    Crown Heights North   
6431  0.00    0.0   6.80   green  credit card          East New York   
6432  3.36    0.0  20.16   green  credit card            Boerum Hill   

                          dropoff_zone pickup_borough dropoff_borough  
0                  UN/Turtle Bay South      Manhattan       Manhattan  
1                Upper West Side South      Manhattan       Manhattan  
2                         West Village      Manhattan       Manhattan  
3                       Yorkville West      Manhattan       Manhattan  
4                       Yorkville West      Manhattan       Manhattan  
...                                ...            ...             ...  
6428              Central Harlem North      Manhattan       Manhattan  
6429  East Concourse/Concourse Village         Queens           Bronx  
6430                    Bushwick North       Brooklyn        Brooklyn  
6431      East Flatbush/Remsen Village       Brooklyn        Brooklyn  
6432                   Windsor Terrace       Brooklyn        Brooklyn  

[6433 rows x 14 columns]


# seaborn makes visualization easy - here is a scatter plot of the data.
sns.scatterplot(data=dft, x="distance", y="fare")

<AxesSubplot:xlabel='distance', ylabel='fare'>


# here's another example
sns.scatterplot(data=dft, x="pickup_borough", y="tip")

<AxesSubplot:xlabel='pickup_borough', ylabel='tip'>


# is the tip proportional to the fare?
sns.scatterplot(data=dft, x="fare", y="tip")

<AxesSubplot:xlabel='fare', ylabel='tip'>


sns.scatterplot(data=dft, x="distance", y="tip")

<AxesSubplot:xlabel='distance', ylabel='tip'>


dft.columns

Index(['pickup', 'dropoff', 'passengers', 'distance', 'fare', 'tip', 'tolls',
       'total', 'color', 'payment', 'pickup_zone', 'dropoff_zone',
       'pickup_borough', 'dropoff_borough'],
      dtype='object')


dft.head(6)


print('dft.iat[5,7]      = ', dft.iat[5,7])
print('dft.loc[5].iat[7] = ', dft.loc[5].iat[7],'\n')
print('dft.loc[5] = ')
print(dft.loc[5])

dft.iat[5,7]      =  12.96
dft.loc[5].iat[7] =  12.96 

dft.loc[5] = 
pickup                   2019-03-11 10:37:23
dropoff                  2019-03-11 10:47:31
passengers                                 1
distance                                0.49
fare                                     7.5
tip                                     2.16
tolls                                      0
total                                  12.96
color                                 yellow
payment                          credit card
pickup_zone        Times Sq/Theatre District
dropoff_zone                    Midtown East
pickup_borough                     Manhattan
dropoff_borough                    Manhattan
Name: 5, dtype: object


dft.head(3)


import numpy as np
r3 = np.array([dft.iat[2,2],dft.iat[2,3],dft.iat[2,4],dft.iat[2,5],dft.iat[2,6],dft.iat[2,7]])
print(r3)

[ 1.    1.37  7.5   2.36  0.   14.16]


r3 = np.array(dft.iloc[2,2:8])
print(r3)

[1 1.37 7.5 2.36 0.0 14.16]


dftp = sns.load_dataset('tips')
dftp.head()


print(dftp.info)

<bound method DataFrame.info of      total_bill   tip     sex smoker   day    time  size
0         16.99  1.01  Female     No   Sun  Dinner     2
1         10.34  1.66    Male     No   Sun  Dinner     3
2         21.01  3.50    Male     No   Sun  Dinner     3
3         23.68  3.31    Male     No   Sun  Dinner     2
4         24.59  3.61  Female     No   Sun  Dinner     4
..          ...   ...     ...    ...   ...     ...   ...
239       29.03  5.92    Male     No   Sat  Dinner     3
240       27.18  2.00  Female    Yes   Sat  Dinner     2
241       22.67  2.00    Male    Yes   Sat  Dinner     2
242       17.82  1.75    Male     No   Sat  Dinner     2
243       18.78  3.00  Female     No  Thur  Dinner     2

[244 rows x 7 columns]>


print('The shape of the data frame is: ', dftp.shape)
print('The size of the data frame is: ', dftp.size)
print('Note that 244*7 =', 244*7)

The shape of the data frame is:  (244, 7)
The size of the data frame is:  1708
Note that 244*7 = 1708


dftp.plot()

<AxesSubplot:>


dftp.describe()


dftp.quantile(q = 0.95, numeric_only=True) # OK in binder, Jan 2025.
#dftp.quantile(0.95) # this didn't work in binder as of jan 2024.

total_bill    38.0610
tip            5.1955
size           4.0000
Name: 0.95, dtype: float64


# alternatives - with thanks to Kevon Brown (MSc student 2023-24)
print(dftp['total_bill'].quantile(0.95))
print(dftp['tip'].quantile(0.95))
print(dftp['size'].quantile(0.95))

38.061
5.1955
4.0


sns.scatterplot(data=dftp, x="total_bill", y="tip")

<AxesSubplot:xlabel='total_bill', ylabel='tip'>

	total_bill	tip	size
count	244.000000	244.000000	244.000000
mean	19.785943	2.998279	2.569672
std	8.902412	1.383638	0.951100
min	3.070000	1.000000	1.000000
25%	13.347500	2.000000	2.000000
50%	17.795000	2.900000	2.000000
75%	24.127500	3.562500	3.000000
max	50.810000	10.000000	6.000000

Vectors¶

variationalform https://variationalform.github.io/¶

Just Enough: progress at pace¶

What this is about:¶

Assigned Reading¶

Vectors¶

`python`: Binder, Anaconda and Jupyter¶

Using `numpy` to represent vectors¶

Using `numpy` for transpose.¶

Addition and Subtraction¶

Vector - scalar multiplication¶

Using `numpy` for vector calculations¶

Vector Norms¶

The Vector $2$-norm ($\ell_2$, or Euclidean, or Pythagorean, distance)¶

Example:¶

The Vector $p$-norm ($\ell_p$, or Minkowski, norms)¶

The Vector $1$-norm ($\ell_1$, Manhattan, or taxicab, distance)¶

The Vector $\infty$-norm ($\ell_\infty$, 'max', or Chebychev, norm)¶

Example:¶

Example¶

Some data - data as vectors¶

The `taxis` data set¶

Data as Vectors¶

Review¶

The `tips` data set¶

Exercises¶

	pickup	dropoff	passengers	distance	fare	tip	total	color	payment	pickup_zone	dropoff_zone	pickup_borough	dropoff_borough
0	2019-03-23 20:21:09	2019-03-23 20:27:24	1	1.60	7.0	2.15	12.95	yellow	credit card	Lenox Hill West	UN/Turtle Bay South	Manhattan	Manhattan
1	2019-03-04 16:11:55	2019-03-04 16:19:00	1	0.79	5.0	0.00	9.30	yellow	cash	Upper West Side South	Upper West Side South	Manhattan	Manhattan
2	2019-03-27 17:53:01	2019-03-27 18:00:25	1	1.37	7.5	2.36	14.16	yellow	credit card	Alphabet City	West Village	Manhattan	Manhattan
3	2019-03-10 01:23:59	2019-03-10 01:49:51	1	7.70	27.0	6.15	36.95	yellow	credit card	Hudson Sq	Yorkville West	Manhattan	Manhattan
4	2019-03-30 13:27:42	2019-03-30 13:37:14	3	2.16	9.0	1.10	13.40	yellow	credit card	Midtown East	Yorkville West	Manhattan	Manhattan

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4

Vectors¶

variationalform https://variationalform.github.io/¶

Just Enough: progress at pace¶

What this is about:¶

Assigned Reading¶

Vectors¶

python: Binder, Anaconda and Jupyter¶

Using numpy to represent vectors¶

Using numpy for transpose.¶

Addition and Subtraction¶

Vector - scalar multiplication¶

Using numpy for vector calculations¶

Vector Norms¶

The Vector $2$-norm ($\ell_2$, or Euclidean, or Pythagorean, distance)¶

Example:¶

The Vector $p$-norm ($\ell_p$, or Minkowski, norms)¶

The Vector $1$-norm ($\ell_1$, Manhattan, or taxicab, distance)¶

The Vector $\infty$-norm ($\ell_\infty$, 'max', or Chebychev, norm)¶

Example:¶

Example¶

Some data - data as vectors¶

The taxis data set¶

Data as Vectors¶

Review¶

The tips data set¶

Exercises¶

`python`: Binder, Anaconda and Jupyter¶

Using `numpy` to represent vectors¶

Using `numpy` for transpose.¶

Using `numpy` for vector calculations¶

The `taxis` data set¶

The `tips` data set¶