[SQL]: cloning Instagram Database
SQL
09/30/2019
Common entities on Instagram: USERS, PHOTOS, COMMENTS, HASHTAGS, LIKES, FOLLOWERS/FOLLOWEES, ...
Tables
CREATE TABLE users (    id INT NOT NULL PRIMARY KEY AUTO_INCREMENT,    username VARCHAR(255) NOT NULL UNIQUE,    created_at TIMESTAMP DEFAULT NOW());
CREATE TABLE photos (    id INT NOT NULL PRIMARY KEY AUTO_INCREMENT,    image_url VARCHAR(255) NOT NULL,    user_id INT NOT NULL,    created_at TIMESTAMP DEFAULT NOW(),    FOREIGN KEY (user_id) REFERENCES users(id));CREATE TABLE comments (    id INT NOT NULL PRIMARY KEY AUTO_INCREMENT,    comment_text VARCHAR(255),    photo_id INT NOT NULL,    user_id INT NOT NULL,    created_at TIMESTAMP DEFAULT NOW(),    FOREIGN KEY (user_id) REFERENCES users(id),    FOREIGN KEY (photo_id) REFERENCES photos(id));CREATE TABLE likes (    user_id INT NOT NULL,    photo_id INT NOT NULL,    created_at TIMESTAMP DEFAULT NOW(),    FOREIGN KEY(user_id) REFERENCES users(id),    FOREIGN KEY(photo_id) REFERENCES photos(id),    PRIMARY KEY(user_id, photo_id));CREATE TABLE follows (    follower_id INT NOT NULL,    followee_id INT NOT NULL,    created_at TIMESTAMP DEFAULT NOW(),    FOREIGN KEY(follower_id) REFERENCES users(id),    FOREIGN KEY(followee_id) REFERENCES users(id),    PRIMARY KEY(follower_id, followee_id));
--CREATE TABLE tags (    id INT NOT NULL PRIMARY KEY AUTO_INCREMENT,    tag_name VARCHAR(255) UNIQUE,    created_at TIMESTAMP DEFAULT NOW());CREATE TABLE photo_tags (    photo_id INT NOT NULL,    tag_id INT NOT NULL,    FOREIGN KEY(photo_id) REFERENCES photos(id),    FOREIGN KEY(tag_id) REFERENCES tags(id),    PRIMARY KEY(photo_id, tag_id));- It's possible to make username a primary key, but it may be slow when string is too long. Better practice to still include the id 
- PRIMARY KEY(user_id, photo_id) in likes prevent from a user to make a multiple likes 
- PRIMARY KEY(follower_id, followee_id) - to prevent following oneself- THIS actually does not prevent! Try using trigger to prevent this.
 
- PRIMARY KEY(photo_id, tag_id) in photo_tags table prevents multiple instances of photo tag 
Hash tag implementation
1. Creating tag column in photos
+----+----------+--------------+---------------------------------+| id | url      | caption      | tags                            |+----+----------+--------------+---------------------------------+|  1 | /dociv32 | Madison      | #capitol#summer#omg#daily       ||  2 | /39dksje | My dog       | #dog#frese#joy#happy            ||  3 | /kfeiw2  | selfie today | #selfietoday#happy#sad#handsome |+----+----------+--------------+---------------------------------+Easy to implement. However, this design..
- Limits # tags that can be stored
- Each tag can't have additional information stored
- Searching for tags can be a hassle
2. Using 2 tables
photos:+----+----------+--------------+| id | url      | caption      |+----+----------+--------------+|  1 | /dociv32 | Madison      ||  2 | /39dksje | My dog       ||  3 | /kfeiw2  | selfie today |+----+----------+--------------+
tags:+-----------+----------+| tag_name  | photo_id |+-----------+----------+| #happy    |        2 || #happy    |        3 || #TGIF     |        2 || #TBT      |        1 || #TBT      |        3 |+-----------+----------+Unlimited # of tags, but this is slower than first solution because it has duplicated strings over and over.
3. Using 3 tables (preferred)
photos:+----+----------+--------------+| id | url      | caption      |+----+----------+--------------+|  1 | /dociv32 | Madison      ||  2 | /39dksje | My dog       ||  3 | /kfeiw2  | selfie today |+----+----------+--------------+
tags:+----+-----------+| id | tag_name  |+----+-----------+|  1 | #happy    ||  2 | #handsome ||  3 | #TGIF     ||  4 | #TBT      ||  5 | #Daily    ||  6 | #FTW      |+----+-----------+
photo_tags:+----------+--------+| photo_id | tag_id |+----------+--------+|        1 |      2 ||        1 |      1 ||        3 |      2 ||        2 |      3 ||        2 |      6 ||        3 |      1 ||        3 |      4 ||        3 |      5 |+----------+--------+Unlimited # of tags and can add additional information
However, this requires more work when inserting/updating and have to worry about orphans; when deleting photos also need to remove all associated information
Read more about  schema performance test 
Work with data
Insert data from  file.sql (currently not available)
Find the 5 oldest users
SELECT username, created_atFROM usersORDER BY created_at LIMIT 5;+------------------+---------------------+| username         | created_at          |+------------------+---------------------+| Darby_Herzog     | 2016-05-06 00:14:21 || Emilio_Bernier52 | 2016-05-06 13:04:30 || Elenor88         | 2016-05-08 01:30:41 || Nicole71         | 2016-05-09 17:30:22 || Jordyn.Jacobson2 | 2016-05-14 07:56:26 |+------------------+---------------------+What day do most users sign up on
- Get day name from created_at
SELECT username, created_at,    DAYNAME(created_at) AS dayFROM users;- Get DAYNAME & COUNT with GROUP BY
SELECT DAYNAME(created_at) AS day,    COUNT(DAYNAME(created_at)) AS cntFROM usersGROUP BY day;+-----------+-----+| day       | cnt |+-----------+-----+| Friday    |  15 || Monday    |  14 || Saturday  |  12 || Sunday    |  16 || Thursday  |  16 || Tuesday   |  14 || Wednesday |  13 |+-----------+-----+- LIMIT 1 (or 2)
SELECT DAYNAME(created_at) AS dayFROM usersGROUP BY dayORDER BY COUNT(DAYNAME(created_at)) DESCLIMIT 1;+----------+| day      |+----------+| Thursday |+----------+Find users who's never posted
- users Left Join photos
SELECT username, image_urlFROM users LEFT JOIN photos    ON users.id = photos.user_id;- Condition statement where image_url = NULL
SELECT username, image_urlFROM usersLEFT JOIN photos    ON users.id = photos.user_idWHERE photos.id IS NULL;+---------------------+-----------+| username            | image_url |+---------------------+-----------+| Aniya_Hackett       | NULL      || Bartholome.Bernhard | NULL      || Bethany20           | NULL      || Darby_Herzog        | NULL      || David.Osinski47     | NULL      || Duane60             | NULL      || Esmeralda.Mraz57    | NULL      || Esther.Zulauf61     | NULL      || Franco_Keebler64    | NULL      || Hulda.Macejkovic    | NULL      || Jaclyn81            | NULL      || Janelle.Nikolaus81  | NULL      || Jessyca_West        | NULL      || Julien_Schmidt      | NULL      || Kasandra_Homenick   | NULL      || Leslie67            | NULL      || Linnea59            | NULL      || Maxwell.Halvorson   | NULL      || Mckenna17           | NULL      || Mike.Auer39         | NULL      || Morgan.Kassulke     | NULL      || Nia_Haag            | NULL      || Ollie_Ledner37      | NULL      || Pearl7              | NULL      || Rocio33             | NULL      || Tierra.Trantow      | NULL      |+---------------------+-----------+Find who got the most likes on a photo
- Get # likes for each photo
SELECT photo_id, COUNT(likes.photo_id)FROM likesGROUP BY photo_id;- ORDER BY & LIMIT
SELECT photo_id, COUNT(likes.photo_id) as likesFROM likesGROUP BY photo_idORDER BY likes DESC LIMIT 1;+----------+-------+| photo_id | likes |+----------+-------+|      145 |    48 |+----------+-------+- Get username who posted photo_id=145
SELECT username, likes.photo_id,    COUNT(likes.photo_id) AS likesFROM usersJOIN photos ON photos.user_id = users.idJOIN likes ON likes.photo_id = photos.idGROUP BY likes.photo_idORDER BY likes DESC LIMIT 1;+---------------+----------+-------+| username      | photo_id | likes |+---------------+----------+-------+| Zack_Kemmer93 |      145 |    48 |+---------------+----------+-------+How many times does average user post
- Total # photos, Total # user
SELECT COUNT(photo.id) FROM photos;SELECT COUNT(users.id) FROM users;+-----------+| COUNT(id) |+-----------+|       257 |+-----------++-----------------+| COUNT(users.id) |+-----------------+|             100 |+-----------------+- Divide
SELECT(SELECT COUNT(photos.id) FROM photos) /(SELECT COUNT(users.id) FROM users) AS res;+--------+| res    |+--------+| 2.5700 |+--------+Use sub-query without JOIN
Get 5 most commonly used hash tags
- Get number of tags
SELECT COUNT(photo_tags.tag_id) num_tagsFROM photo_tagsGROUP BY photo_tags.tag_idORDER BY num_tags DESC LIMIT 5;+----------+| num_tags |+----------+|       59 ||       42 ||       39 ||       38 ||       24 |+----------+- Join to get name of tag
SELECT tag_name, COUNT(photo_tags.tag_id) num_tagsFROM photo_tags    JOIN tags on tags.id = photo_tags.tag_idGROUP BY photo_tags.tag_idORDER BY num_tags DESC LIMIT 5;+----------+----------+| tag_name | num_tags |+----------+----------+| smile    |       59 || beach    |       42 || party    |       39 || fun      |       38 || concert  |       24 |+----------+----------+Get users who have liked every single photo
- Take a look at the likes table
$ DESC likes;+------------+-----------+------+-----+-------------------+-------+| Field      | Type      | Null | Key | Default           | Extra |+------------+-----------+------+-----+-------------------+-------+| user_id    | int(11)   | NO   | PRI | NULL              |       || photo_id   | int(11)   | NO   | PRI | NULL              |       || created_at | timestamp | NO   |     | CURRENT_TIMESTAMP |       |+------------+-----------+------+-----+-------------------+-------+- Get number of likes for each user
SELECT username, COUNT(*) as num_likesFROM users JOIN likes ON    users.id = likes.user_idGROUP BY users.id;- Get #photos
SELECT COUNT(photos.id) AS num_photos FROM photos;+------------+| num_photos |+------------+|        257 |+------------+- Select users with HAVING #likes = #photos
SELECT username, COUNT(*) as num_likesFROM users JOIN likes ON    users.id = likes.user_idGROUP BY users.idHAVING num_likes =    (        SELECT COUNT(photos.id) FROM photos    );- Solution w/o using sub-query
SELECT username, COUNT(*) as num_likesFROM users JOIN likes ON    users.id = likes.user_idGROUP BY users.idHAVING num_likes = MAX(likes.photo_id);