import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd

columns = ["POP_EST", "GDP_MD", "CONTINENT", "SUBREGION", "geometry"]
countries = gpd.read_file("ne_110m_admin_0_countries.shp").set_index("NAME")[columns]
countries

populations = countries.groupby("CONTINENT")["POP_EST"].sum()
populations

CONTINENT
Africa                     1.306370e+09
Antarctica                 4.490000e+03
Asia                       4.550277e+09
Europe                     7.454125e+08
North America              5.837560e+08
Oceania                    4.120487e+07
Seven seas (open ocean)    1.400000e+02
South America              4.270667e+08
Name: POP_EST, dtype: float64

type(populations)

pandas.core.series.Series

populations.plot()

<Axes: xlabel='CONTINENT'>

countries.groupby("CONTINENT")["POP_EST"].sum()

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f7a2d6d09d0>

# 1. Rename groupby to dissolve
# 2. Specify the aggfunc as a string like 'sum' in the dissolve after choosing how to dissolve by
#    The aggfunc tells dissolve how to combine values in columns other than geometry
# 3. We also recommend subsetting your data at a good opportunity
# dissolve has an interesting default of combining on the geometries
countries[["CONTINENT", "POP_EST", "geometry"]].dissolve("CONTINENT", "sum").plot(column="POP_EST")

<Axes: >

pd.read_csv("earthquakes.csv").plot()

<Axes: >

earthquakes["longitude"]

id
nc72666881               -121.619
us20006i0y                94.5721
nc72666891    -118.85916670000002
nc72666896    -118.99483329999998
nn00553447               -119.845
                     ...         
nc72685246           -121.0998306
ak13879193              -149.8627
nc72685251           -122.8215027
ci37672328           -118.6353333
ci37672360    -116.93366670000002
Name: longitude, Length: 8394, dtype: object

earthquakes = gpd.read_file("earthquakes.csv").set_index("id")
earthquakes

earthquakes.plot()

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[22], line 1
----> 1 earthquakes.plot()

File /opt/conda/lib/python3.11/site-packages/pandas/plotting/_core.py:1030, in PlotAccessor.__call__(self, *args, **kwargs)
   1027             label_name = label_kw or data.columns
   1028             data.columns = label_name
-> 1030 return plot_backend.plot(data, kind=kind, **kwargs)

File /opt/conda/lib/python3.11/site-packages/pandas/plotting/_matplotlib/__init__.py:71, in plot(data, kind, **kwargs)
     69         kwargs["ax"] = getattr(ax, "left_ax", ax)
     70 plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 71 plot_obj.generate()
     72 plot_obj.draw()
     73 return plot_obj.result

File /opt/conda/lib/python3.11/site-packages/pandas/plotting/_matplotlib/core.py:499, in MPLPlot.generate(self)
    497 @final
    498 def generate(self) -> None:
--> 499     self._compute_plot_data()
    500     fig = self.fig
    501     self._make_plot(fig)

File /opt/conda/lib/python3.11/site-packages/pandas/plotting/_matplotlib/core.py:698, in MPLPlot._compute_plot_data(self)
    696 # no non-numeric frames or series allowed
    697 if is_empty:
--> 698     raise TypeError("no numeric data to plot")
    700 self.data = numeric_data.apply(type(self)._convert_to_ndarray)

TypeError: no numeric data to plot

earthquakes = pd.read_csv("earthquakes.csv").set_index("id")
earthquakes = gpd.GeoDataFrame(
    earthquakes,
    # crs="EPSG:4326" specifies WGS84 or GPS coordinate system, see https://epsg.io/4326
    geometry=gpd.points_from_xy(earthquakes["longitude"], earthquakes["latitude"], crs="EPSG:4326")
)
earthquakes

fig, ax = plt.subplots(figsize=(13, 5))
countries.plot(ax=ax, color="#EEE")
earthquakes.plot(ax=ax, column="magnitude", markersize=0.1, legend=True)
ax.set(title="Earthquakes between July 27, 2016 and August 25, 2016")
ax.set_axis_off()

fig, ax = plt.subplots()
na_countries = countries[countries["CONTINENT"] == "North America"]
na_countries.plot(ax=ax, color="#EEE")
earthquakes[earthquakes["name"] == "Washington"].plot(ax=ax, column="magnitude", markersize=0.1)
ax.set(title="Earthquakes in Washington")
ax.set_axis_off()

na_countries.bounds

na_countries.total_bounds

array([-171.7911106 ,    7.22054149,  -12.20855   ,   83.64513   ])

earthquakes_na_bounds = earthquakes.clip(na_countries.total_bounds)
earthquakes_na_bounds

fig, ax = plt.subplots()
na_countries = countries[countries["CONTINENT"] == "North America"]
na_countries.plot(ax=ax, color="#EEE")
earthquakes_na_bounds.plot(ax=ax, column="magnitude", markersize=0.1)
ax.set(title="Earthquakes in North America (over land or sea)")
ax.set_axis_off()

earthquakes_na_countries = earthquakes.sjoin(na_countries)
# Result has all the columns from earthquakes (incl geometry)
#   and all the columns from countries (except geometry)
# Lots of duplicated rows for the country United States of America
#   sjoin operation is looking for where your earthquake falls in the countries dataset
earthquakes_na_countries

# how="left" keeps all the earthquakes even if there is no match in na_countries!
earthquakes.sjoin(na_countries, how="left")

# how="right" keeps all the countries even if there is no match in earthquakes!
#   emphasizes using the "right-side dataset" as the index and the geometry
earthquakes.sjoin(na_countries, how="right")

# outer is not allowed since it might be confusing what the index or geometry should be in the result
earthquakes.sjoin(na_countries, how="outer")

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[40], line 1
----> 1 earthquakes.sjoin(na_countries, how="outer")

File /opt/conda/lib/python3.11/site-packages/geopandas/geodataframe.py:2391, in GeoDataFrame.sjoin(self, df, *args, **kwargs)
   2307 def sjoin(self, df, *args, **kwargs):
   2308     """Spatial join of two GeoDataFrames.
   2309 
   2310     See the User Guide page :doc:`../../user_guide/mergingdata` for details.
   (...)
   2389     sjoin : equivalent top-level function
   2390     """
-> 2391     return geopandas.sjoin(left_df=self, right_df=df, *args, **kwargs)

File /opt/conda/lib/python3.11/site-packages/geopandas/tools/sjoin.py:114, in sjoin(left_df, right_df, how, predicate, lsuffix, rsuffix, distance, on_attribute, **kwargs)
    110     raise TypeError(f"sjoin() got an unexpected keyword argument '{first}'")
    112 on_attribute = _maybe_make_list(on_attribute)
--> 114 _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=on_attribute),
    116 indices = _geom_predicate_query(
    117     left_df, right_df, predicate, distance, on_attribute=on_attribute
    118 )
    120 joined, _ = _frame_join(
    121     left_df,
    122     right_df,
   (...)
    129     on_attribute=on_attribute,
    130 )

File /opt/conda/lib/python3.11/site-packages/geopandas/tools/sjoin.py:175, in _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute)
    173 allowed_hows = ["left", "right", "inner"]
    174 if how not in allowed_hows:
--> 175     raise ValueError(
    176         '`how` was "{}" but is expected to be in {}'.format(how, allowed_hows)
    177     )
    179 if not _check_crs(left_df, right_df):
    180     _crs_mismatch_warn(left_df, right_df, stacklevel=4)

ValueError: `how` was "outer" but is expected to be in ['left', 'right', 'inner']

fig, ax = plt.subplots()
na_countries = countries[countries["CONTINENT"] == "North America"]
na_countries.plot(ax=ax, color="#EEE")
earthquakes_na_countries.plot(ax=ax, column="magnitude", markersize=0.1)
ax.set(title="Earthquakes in North America (over land)")
ax.set_axis_off()

movies = pd.DataFrame([
    {"movie_id": 51, "movie_name": "Lady Bird", "year": 2017, "director_id": 23},
    {"movie_id": 47, "movie_name": "Grand Budapest Hotel", "year": 2014, "director_id": 16},
    {"movie_id": 103, "movie_name": "Parasite", "year": 2019, "director_id": 14},
    {"movie_id": 34, "movie_name": "Frozen", "year": 2013, "director_id": 18},
    {"movie_id": 37, "movie_name": "Moonrise Kingdom", "year": 2012, "director_id": 16},
])
movies

directors = pd.DataFrame([
    {"director_id": 14, "director_name": "Bong Joon Ho"},
    {"director_id": 23, "director_name": "Greta Gerwig"},
    {"director_id": 16, "director_name": "Wes Anderson"},
    {"director_id": 21, "director_name": "Quentin Tarantino"},
    {"director_id": 27, "director_name": "Kathryn Bigelow"},
])
directors

# Default to how="inner"
movies.merge(directors, on="director_id")

!pip install -q folium mapclassify

[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: pip install --upgrade pip

earthquakes.explore(column="magnitude")

earthquakes

countries.explore(column="GDP_MD")

	POP_EST	GDP_MD	CONTINENT	SUBREGION	geometry
NAME
Fiji	889953.0	5496	Oceania	Melanesia	MULTIPOLYGON (((180 -16.06713, 180 -16.55522, ...
Tanzania	58005463.0	63177	Africa	Eastern Africa	POLYGON ((33.90371 -0.95, 34.07262 -1.05982, 3...
W. Sahara	603253.0	907	Africa	Northern Africa	POLYGON ((-8.66559 27.65643, -8.66512 27.58948...
Canada	37589262.0	1736425	North America	Northern America	MULTIPOLYGON (((-122.84 49, -122.97421 49.0025...
United States of America	328239523.0	21433226	North America	Northern America	MULTIPOLYGON (((-122.84 49, -120 49, -117.0312...
...	...	...	...	...	...
Serbia	6944975.0	51475	Europe	Southern Europe	POLYGON ((18.82982 45.90887, 18.82984 45.90888...
Montenegro	622137.0	5542	Europe	Southern Europe	POLYGON ((20.0707 42.58863, 19.80161 42.50009,...
Kosovo	1794248.0	7926	Europe	Southern Europe	POLYGON ((20.59025 41.85541, 20.52295 42.21787...
Trinidad and Tobago	1394973.0	24269	North America	Caribbean	POLYGON ((-61.68 10.76, -61.105 10.89, -60.895...
S. Sudan	11062113.0	11998	Africa	Eastern Africa	POLYGON ((30.83385 3.50917, 29.9535 4.1737, 29...

	minx	miny	maxx	maxy
NAME
Canada	-140.997780	41.675105	-52.648099	83.233240
United States of America	-171.791111	18.916190	-66.964660	71.357764
Haiti	-74.458034	18.030993	-71.624873	19.915684
Dominican Rep.	-71.945112	17.598564	-68.317943	19.884911
Bahamas	-78.980000	23.710000	-77.000000	27.040000
Greenland	-73.297000	60.036760	-12.208550	83.645130
Mexico	-117.127760	14.538829	-86.811982	32.720830
Panama	-82.965783	7.220541	-77.242566	9.611610
Costa Rica	-85.941725	8.225028	-82.546196	11.217119
Nicaragua	-87.668493	10.726839	-83.147219	15.016267
Honduras	-89.353326	12.984686	-83.147219	16.005406
El Salvador	-90.095555	13.149017	-87.723503	14.424133
Guatemala	-92.229249	13.735338	-88.225023	17.819326
Belize	-89.229122	15.886938	-88.106813	18.499982
Puerto Rico	-67.242428	17.946553	-65.591004	18.520601
Jamaica	-78.337719	17.701116	-76.199659	18.524218
Cuba	-84.974911	19.855481	-74.178025	23.188611
Trinidad and Tobago	-61.950000	10.000000	-60.895000	10.890000

	year	month	day	latitude	longitude	name	magnitude	geometry
id
pr16228001	2016	8	15	17.0398	-67.6287	Puerto Rico	3.3	POINT (-67.6287 17.0398)
pr16236015	2016	8	23	17.5515	-66.4619	Puerto Rico	2.4	POINT (-66.4619 17.5515)
pr16227004	2016	8	14	17.7280	-67.0207	Puerto Rico	2.5	POINT (-67.0207 17.728)
pr16226007	2016	8	13	17.7597	-66.8927	Puerto Rico	2.6	POINT (-66.8927 17.7597)
pr16226009	2016	8	13	17.7644	-66.9978	Puerto Rico	2.3	POINT (-66.9978 17.7644)
...	...	...	...	...	...	...	...	...
ak13835344	2016	8	7	69.3298	-144.0389	Alaska	1.4	POINT (-144.0389 69.3298)
ak13875730	2016	8	23	69.5927	-143.5277	Alaska	2.3	POINT (-143.5277 69.5927)
ak13859105	2016	8	20	69.6423	-144.8114	Alaska	1.9	POINT (-144.8114 69.6423)
ak13839206	2016	8	9	69.7401	-146.3768	Alaska	1.7	POINT (-146.3768 69.7401)
ak13841660	2016	8	12	70.7787	-145.8338	Alaska	2.6	POINT (-145.8338 70.7787)

	year	month	day	latitude	longitude	name	magnitude	geometry	NAME	POP_EST	GDP_MD	CONTINENT	SUBREGION
id
nc72666881	2016	7	27	37.672333	-121.619000	California	1.43	POINT (-121.619 37.67233)	United States of America	328239523.0	21433226	North America	Northern America
nc72666891	2016	7	27	37.576500	-118.859167	California	0.06	POINT (-118.85917 37.5765)	United States of America	328239523.0	21433226	North America	Northern America
nc72666896	2016	7	27	37.595833	-118.994833	California	0.40	POINT (-118.99483 37.59583)	United States of America	328239523.0	21433226	North America	Northern America
nn00553447	2016	7	27	39.377500	-119.845000	Nevada	0.30	POINT (-119.845 39.3775)	United States of America	328239523.0	21433226	North America	Northern America
ak13805337	2016	7	27	61.296300	-152.460000	Alaska	1.80	POINT (-152.46 61.2963)	United States of America	328239523.0	21433226	North America	Northern America
...	...	...	...	...	...	...	...	...	...	...	...	...	...
nc72685246	2016	8	25	36.515499	-121.099831	California	2.42	POINT (-121.09983 36.5155)	United States of America	328239523.0	21433226	North America	Northern America
ak13879193	2016	8	25	61.498400	-149.862700	Alaska	1.40	POINT (-149.8627 61.4984)	United States of America	328239523.0	21433226	North America	Northern America
nc72685251	2016	8	25	38.805000	-122.821503	California	1.06	POINT (-122.8215 38.805)	United States of America	328239523.0	21433226	North America	Northern America
ci37672328	2016	8	25	34.308000	-118.635333	California	1.55	POINT (-118.63533 34.308)	United States of America	328239523.0	21433226	North America	Northern America
ci37672360	2016	8	25	34.119167	-116.933667	California	0.89	POINT (-116.93367 34.11917)	United States of America	328239523.0	21433226	North America	Northern America

	year	month	day	latitude	longitude	name	magnitude	geometry	NAME	POP_EST	GDP_MD	CONTINENT	SUBREGION
id
nc72666881	2016	7	27	37.672333	-121.619000	California	1.43	POINT (-121.619 37.67233)	United States of America	328239523.0	21433226.0	North America	Northern America
us20006i0y	2016	7	27	21.514600	94.572100	Burma	4.90	POINT (94.5721 21.5146)	NaN	NaN	NaN	NaN	NaN
nc72666891	2016	7	27	37.576500	-118.859167	California	0.06	POINT (-118.85917 37.5765)	United States of America	328239523.0	21433226.0	North America	Northern America
nc72666896	2016	7	27	37.595833	-118.994833	California	0.40	POINT (-118.99483 37.59583)	United States of America	328239523.0	21433226.0	North America	Northern America
nn00553447	2016	7	27	39.377500	-119.845000	Nevada	0.30	POINT (-119.845 39.3775)	United States of America	328239523.0	21433226.0	North America	Northern America
...	...	...	...	...	...	...	...	...	...	...	...	...	...
nc72685246	2016	8	25	36.515499	-121.099831	California	2.42	POINT (-121.09983 36.5155)	United States of America	328239523.0	21433226.0	North America	Northern America
ak13879193	2016	8	25	61.498400	-149.862700	Alaska	1.40	POINT (-149.8627 61.4984)	United States of America	328239523.0	21433226.0	North America	Northern America
nc72685251	2016	8	25	38.805000	-122.821503	California	1.06	POINT (-122.8215 38.805)	United States of America	328239523.0	21433226.0	North America	Northern America
ci37672328	2016	8	25	34.308000	-118.635333	California	1.55	POINT (-118.63533 34.308)	United States of America	328239523.0	21433226.0	North America	Northern America
ci37672360	2016	8	25	34.119167	-116.933667	California	0.89	POINT (-116.93367 34.11917)	United States of America	328239523.0	21433226.0	North America	Northern America

Dissolve, Intersect, and Join¶

Dissolve: `groupby` for geospatial data¶

Combining tabular and geospatial data¶

Bounding boxes¶

Spatial join¶

Attribute joins¶

Interactive maps¶

	year	month	day	latitude	longitude	name	magnitude
id
nc72666881	2016	7	27	37.6723333	-121.619	California	1.43
us20006i0y	2016	7	27	21.5146	94.5721	Burma	4.9
nc72666891	2016	7	27	37.5765	-118.85916670000002	California	0.06
nc72666896	2016	7	27	37.5958333	-118.99483329999998	California	0.4
nn00553447	2016	7	27	39.3775	-119.845	Nevada	0.3
...	...	...	...	...	...	...	...
nc72685246	2016	8	25	36.51549910000001	-121.0998306	California	2.42
ak13879193	2016	8	25	61.4984	-149.8627	Alaska	1.4
nc72685251	2016	8	25	38.8050003	-122.8215027	California	1.06
ci37672328	2016	8	25	34.308	-118.6353333	California	1.55
ci37672360	2016	8	25	34.1191667	-116.93366670000002	California	0.89

	id	year	month	day	latitude	longitude	name	magnitude	POP_EST	GDP_MD	CONTINENT	SUBREGION	geometry
NAME
Canada	ak13806840	2016.0	7.0	27.0	61.4966	-140.6142	Canada	1.10	37589262.0	1736425	North America	Northern America	MULTIPOLYGON (((-122.84 49, -122.97421 49.0025...
Canada	ak13831382	2016.0	7.0	27.0	61.4500	-140.6041	Alaska	0.90	37589262.0	1736425	North America	Northern America	MULTIPOLYGON (((-122.84 49, -122.97421 49.0025...
Canada	ak13831385	2016.0	7.0	27.0	59.7753	-136.5700	Alaska	1.20	37589262.0	1736425	North America	Northern America	MULTIPOLYGON (((-122.84 49, -122.97421 49.0025...
Canada	uw61172492	2016.0	7.0	27.0	49.4250	-120.5230	Canada	1.94	37589262.0	1736425	North America	Northern America	MULTIPOLYGON (((-122.84 49, -122.97421 49.0025...
Canada	ak13811717	2016.0	7.0	27.0	61.2486	-139.3301	Canada	1.30	37589262.0	1736425	North America	Northern America	MULTIPOLYGON (((-122.84 49, -122.97421 49.0025...
...	...	...	...	...	...	...	...	...	...	...	...	...	...
Puerto Rico	pr16236006	2016.0	8.0	23.0	18.0020	-67.0385	Puerto Rico	2.10	3193694.0	104988	North America	Caribbean	POLYGON ((-66.28243 18.51476, -65.7713 18.4266...
Puerto Rico	pr16237004	2016.0	8.0	24.0	18.4722	-66.4264	Puerto Rico	2.50	3193694.0	104988	North America	Caribbean	POLYGON ((-66.28243 18.51476, -65.7713 18.4266...
Jamaica	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2948279.0	16458	North America	Caribbean	POLYGON ((-77.5696 18.49053, -76.89662 18.4008...
Cuba	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	11333483.0	100023	North America	Caribbean	POLYGON ((-82.26815 23.18861, -81.40446 23.117...
Trinidad and Tobago	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1394973.0	24269	North America	Caribbean	POLYGON ((-61.68 10.76, -61.105 10.89, -60.895...

	movie_id	movie_name	year	director_id
0	51	Lady Bird	2017	23
1	47	Grand Budapest Hotel	2014	16
2	103	Parasite	2019	14
3	34	Frozen	2013	18
4	37	Moonrise Kingdom	2012	16

	director_id	director_name
0	14	Bong Joon Ho
1	23	Greta Gerwig
2	16	Wes Anderson
3	21	Quentin Tarantino
4	27	Kathryn Bigelow

Dissolve, Intersect, and Join¶

Dissolve: groupby for geospatial data¶

Combining tabular and geospatial data¶

Bounding boxes¶

Spatial join¶

Attribute joins¶

Interactive maps¶

Dissolve: `groupby` for geospatial data¶