CREATE TABLE payroll ( user_id INT PRIMARY KEY, name TEXT, job TEXT, salary INT ); INSERT INTO payroll VALUES (123, 'Jack', 'TA', 50000), (345, 'Allison', 'TA', 60000), (567, 'Magda', 'Prof', 90000), (789, 'Dan', 'Prof', 100000); CREATE TABLE regist ( user_id INT REFERENCES payroll, car TEXT ); INSERT INTO regist VALUES (123, 'Charger'), (567, 'Civic'), (567, 'Pinto'); SELECT * FROM payroll; SELECT * FROM regist; -- avg salary of people who drive a car -- allison gets a pay cut UPDATE payroll SET salary = 50000 WHERE user_id = 345; SELECT * FROM payroll; -- allison buys a tesla INSERT INTO regist VALUES (345, 'Tesla'); SELECT * FROM regist; -- broken version 2 again: SELECT avg(DISTINCT salary) FROM payroll p JOIN regist r ON p.user_id = r.user_id; -- now returns the wrong answer -- intermediate result with names SELECT name, salary FROM payroll p JOIN regist r ON p.user_id = r.user_id; -- fix this query by first computing user_ids who drive WITH drivers AS ( SELECT DISTINCT user_id, salary FROM payroll p, regist r WHERE p.user_id = r.user_id ) SELECT avg(salary) FROM drivers; CREATE VIEW drivers AS SELECT DISTINCT p.* FROM payroll p, regist r WHERE p.user_id = r.user_id; -- give allison her old salary back UPDATE payroll SET salary = 60000 WHERE user_id = 345; SELECT * FROM PAYROLL; -- allison sells the tesla DELETE FROM regist WHERE user_id = 345 AND car = 'Tesla'; SELECT * FROM regist; -- find person with highest salary for each job WITH highest_salaries AS ( SELECT job, max(salary) as highest_salary FROM payroll GROUP BY job ) SELECT hs.job, p.name, p.salary FROM highest_salaries hs, payroll p WHERE hs.job = p.job AND hs.highest_salary = p.salary; -- subquery in from SELECT hs.job, p.name, p.salary FROM ( SELECT job, max(salary) as highest_salary FROM payroll GROUP BY job ) hs, payroll p WHERE hs.job = p.job AND hs.highest_salary = p.salary; -- for each person, find the avg salary of their job -- subquery in select -- "scalar subquery" because it returns one column (and one row) SELECT p1.name, (SELECT avg(p2.salary) FROM payroll p2 WHERE p2.job = p1.job) FROM payroll p1 -- same query unnested SELECT p1.name, avg(p2.salary) FROM payroll p1, payroll p2 WHERE p1.job = p2.job GROUP BY p1.user_id, p1.name -- broken, error: can't have two columns in a subquery in select SELECT p1.name, (SELECT avg(p2.salary), min(p2.salary) FROM payroll p2 WHERE p2.job = p1.job) FROM payroll p1 -- fixed: now each scalar subquery has only one column SELECT p1.name, (SELECT avg(p2.salary) FROM payroll p2 WHERE p2.job = p1.job), (SELECT min(p2.salary) FROM payroll p2 WHERE p2.job = p1.job) FROM payroll p1 -- for each person find the number of cars they drive SELECT p.name, (SELECT count(*) FROM regist r WHERE r.user_id = p.user_id) FROM payroll p -- unnested, broken SELECT p.name, count(r.car) FROM payroll p JOIN regist r ON p.user_id = r.user_id GROUP BY p.user_id, p.name -- unnested, fixed SELECT p.name, count(r.car) FROM payroll p LEFT OUTER JOIN regist r ON p.user_id = r.user_id GROUP BY p.user_id, p.name -- for each person, *list* the cars they drive -- broken: if someone drives more than one car, this query will -- pick an arbitrary single car that they drive to list. -- -- side note: scalar subquery returns NULL if 0 rows. SELECT p.name, (SELECT r.car FROM regist r WHERE p.user_id = r.user_id) FROM payroll p -- must be written unnested SELECT p.name, r.car FROM payroll p, regist r WHERE p.user_id = r.user_id -- for each person, compute a string which is the comma separated list -- of the cars they drive -- -- need a new aggregate function that makes a string -- https://www.sqlite.org/lang_aggfunc.html#group_concat SELECT p.name, (SELECT string_agg(r.car, ',') FROM regist r WHERE r.user_id = p.user_id) as cars FROM payroll p -- unnested SELECT p.name, string_agg(r.car, ',') as cars FROM payroll p, regist r WHERE r.user_id = p.user_id GROUP BY p.user_id -- another string_agg example: -- for each job, list the people with that job SELECT job, string_agg(name, ',') as workers FROM payroll GROUP BY job -- ┌──────┬──────────────┐ -- │ job │ workers │ -- ├──────┼──────────────┤ -- │ Prof │ Magda,Dan │ -- │ TA │ Jack,Allison │ -- └──────┴──────────────┘ -- Note that the lists are in whatever order SQLite decided -- -- We can fix that with a new feature: -- ORDER BY *inside* string_agg SELECT job, string_agg(name, ',' ORDER BY name) as workers FROM payroll GROUP BY job; -- ┌──────┬──────────────┐ -- │ job │ workers │ -- ├──────┼──────────────┤ -- │ Prof │ Dan,Magda │ -- │ TA │ Allison,Jack │ -- └──────┴──────────────┘ -- ORDER BY inside an aggregate function is only supported -- where it affects the result, so only string_agg for us. -- -- bit of a weird feature, goes beyond the relational model, -- but very very useful in practice in my experience. -- Find all employees who earn less than the average of their job SELECT p1.name, p1.salary FROM payroll p1 WHERE p1.salary < (SELECT avg(p2.salary) FROM payroll p2 WHERE p1.job = p2.job); -- unnested SELECT p1.name, p1.salary FROM payroll p1, payroll p2 WHERE p1.job = p2.job GROUP BY p1.name, p1.salary HAVING p1.salary < avg(p2.salary); -- Find people who do drive cars SELECT p.user_id, p.name FROM payroll p WHERE EXISTS ( SELECT * FROM regist r WHERE p.user_id = r.user_id ) -- unnested SELECT DISTINCT p.user_id, p.name FROM payroll p, regist r WHERE p.user_id = r.user_id; -- Find people who do *not* drive cars SELECT p.user_id, p.name FROM payroll p WHERE NOT EXISTS ( SELECT * FROM regist r WHERE p.user_id = r.user_id ) -- unnested broken SELECT DISTINCT p.user_id, p.name FROM payroll p, regist r WHERE p.user_id != r.user_id; -- cannot be unnested! (see slides for proof) -- IN/NOT IN -- Find people who drive cars (again) SELECT p.user_id, p.name FROM payroll p WHERE p.user_id IN ( SELECT r.user_id FROM regist r ); -- Find people who do not drive cars (again) SELECT p.user_id, p.name FROM payroll p WHERE p.user_id NOT IN ( SELECT r.user_id FROM regist r ); -- The query on the right side of IN/NOT IN can have > 1 column -- ie, not necessarily scalar CREATE TABLE courses ( course_id TEXT, title TEXT, year INT, quarter TEXT ); INSERT INTO courses VALUES ('CSE 123', 'intro to programming iii', 2024, 'au'), ('CSE 123', 'intro to programming iii', 2025, 'wi'), ('CSE 123', 'intro to programming iii', 2025, 'sp'), ('CSE 344', 'intro to data management', 2024, 'au'), ('CSE 344', 'intro to data management', 2025, 'wi'), ('CSE 344', 'intro to data management', 2025, 'sp'), ('CSE 444', 'database systems', 2025, 'wi'), ('CSE 444', 'database systems', 2025, 'sp'); CREATE TABLE instructors ( name TEXT, course_id TEXT, year INT, quarter TEXT ); INSERT INTO instructors VALUES ('James', 'CSE 123', 2024, 'au'), ('James', 'CSE 344', 2025, 'wi'), ('Ryan', 'CSE 444', 2025, 'wi'), ('Ryan', 'CSE 444', 2025, 'sp'); SELECT * FROM courses; SELECT * FROM instructors; -- Find the title of all courses James has taught SELECT title FROM courses WHERE (course_id, year, quarter) IN ( SELECT course_id, year, quarter -- nonscalar subquery FROM instructors WHERE name = 'James' )