.mode column .header ON CREATE TABLE Payroll ( UserID INT PRIMARY KEY, Name VARCHAR(256), Job VARCHAR(256), Salary INT); INSERT INTO Payroll VALUES (123, 'Jack', 'TA', 50000), (345, 'Allison', 'TA', 60000), (567, 'Magda', 'Prof', 90000), (789, 'Dan', 'Prof', 100000); CREATE TABLE Regist ( UserID INT REFERENCES Payroll, Car VARCHAR(256)); INSERT INTO Regist VALUES (123, 'Charger'), (567, 'Civic'), (567, 'Pinto'); -------------------------- -- Recap SELECT count(*) as c FROM payroll WHERE job = 'TA'; SELECT count(*) as c, avg(Salary) as a FROM payroll WHERE job = 'TA'; SELECT Job, count(*) FROM Payroll GROUP BY Job; SELECT Job, count(*) FROM Payroll WHERE Salary > 55000 GROUP BY Job; SELECT Job, count(*) FROM Payroll WHERE Salary > 75000 GROUP BY Job; -------------------------- -- Empty groups SELECT P.Name, count(*) FROM Payroll P, Regist R WHERE P.UserID = R.UserID GROUP by P.UserID, P.Name; SELECT P.Name, count(*) FROM Payroll P LEFT OUTER JOIN Regist R ON P.UserID = R.UserID GROUP by P.UserID, P.Name; SELECT P.Name, count(R.Car) FROM Payroll P LEFT OUTER JOIN Regist R ON P.UserID = R.UserID GROUP by P.UserID, P.Name; SELECT P1.Job, count(DISTINCT P2.UserID) FROM Payroll P1 LEFT OUTER JOIN Payroll P2 ON P1.Job = P2.Job and P2.Salary > 75000 GROUP BY P1.Job; -- we must use P2.Salary > 75000! -- using P1.Salary > 75000 is incorrect -- to see why, let's add a 3rd job, with one salary <75000 and one >75000: INSERT INTO Payroll VALUES (222, 'Joe', 'Lec', 50000), (333, 'Jim', 'Lec', 90000); SELECT * FROM Payroll; -- for the group 'Lec' we expect count = 1 -- but using P1.Salary>75000, we get count = 2: SELECT P1.Job, count(DISTINCT P2.UserID) FROM Payroll P1 LEFT OUTER JOIN Payroll P2 ON P1.Job = P2.Job and P1.Salary > 75000 GROUP BY P1.Job; -- to understand why it is incorrect, look at output to the full query -- make sure you understand how the LEFT OUT JOIN worked: SELECT * FROM Payroll P1 LEFT OUTER JOIN Payroll P2 ON P1.Job = P2.Job and P1.Salary > 75000; -- the correct condition is P2.Salary > 75000: SELECT P1.Job, count(DISTINCT P2.UserID) FROM Payroll P1 LEFT OUTER JOIN Payroll P2 ON P1.Job = P2.Job and P2.Salary > 75000 GROUP BY P1.Job; -- also look at the full query: SELECT * FROM Payroll P1 LEFT OUTER JOIN Payroll P2 ON P1.Job = P2.Job and P2.Salary > 75000; -------------------------- SELECT P1.Job, P2.Name, P2.Salary FROM Payroll AS P1, Payroll AS P2 WHERE P1.Job = P2.Job GROUP BY P1.Job, P2.Name, P2.Salary HAVING MAX(P1.Salary) = P2.Salary; ------- -- average salary of car drivers WITH Cardrivers AS (SELECT DISTINCT P.UserID, P.Salary FROM Payroll P, Regist R WHERE P.UserId=R.UserID) SELECT avg(Salary) FROM Cardrivers; -- CREATE VIEW CarDrivers AS SELECT DISTINCT P.* FROM Payroll P, Regist R WHERE P.UserId=R.UserID; -- Views are computed at query time, with fresh data: SELECT * FROM CarDrivers; -- Update INSERT INTO Regist VALUES (345, 'Tesla'); -- Rerun, and see the update: SELECT * FROM CarDrivers; -- Constrast with materializing query outputs: CREATE TABLE Drivers as SELECT DISTINCT P.* FROM Payroll P, Regist R WHERE P.UserId=R.UserID; -- This query uses stale data: if you update Payroll/Regist the output below doesn't change: SELECT * FROM Drivers; -- Computing the witness using WITH: WITH JobSal AS (SELECT Job, max(Salary) as M FROM Payroll GROUP BY Job) SELECT J.Job, P.Name, P.Salary FROM JobSal J, Payroll P WHERE J.Job = P.Job and J.M = P.Salary; -------------- CREATE TABLE company (name TEXT, revenue INT, year INT); INSERT INTO company VALUES ('ACME', 100000, 1995), ('IBM', 200000, 2012), ('Apple', 300000, 2012), ('IBM', 250000, 2019); SELECT year/10*10 AS Start, year/10*10 + 9 AS End, Name, sum(revenue) FROM company GROUP BY year/10*10, year/10*10 + 9, Name; -- also possible in sqlite: SELECT year/10*10 AS Start, year/10*10 + 9 AS End, Name, sum(revenue) FROM company GROUP BY Start, End, Name; SELECT X.Year, X.Year+9, X.Name, sum(Y.Revenue) FROM Company X, Company Y WHERE X.Name = Y.Name and X.Year <= Y.Year and Y.Year < X.Year+10 GROUP BY X.Year, X.Year+9, X.Name ORDER BY X.Year;