## Python Data Structures Challenge: Netflix Content Analysis

### The Dataset: Netflix Shows & Movies

In [2]:
import pandas as pd

netflix_data = [
    ("Stranger Things", "TV Show", "United States", "2016", "TV-14", "81 min", ["Drama", "Fantasy", "Horror"], 8.7, "Four friends uncover supernatural mysteries"),
    ("The Crown", "TV Show", "United Kingdom", "2016", "TV-MA", "58 min", ["Biography", "Drama", "History"], 8.6, "British royal family drama"),
    ("Money Heist", "TV Show", "Spain", "2017", "TV-MA", "70 min", ["Action", "Crime", "Mystery"], 8.3, "Bank heist with a master plan"),
    ("Bird Box", "Movie", "United States", "2018", "R", "124 min", ["Drama", "Horror", "Sci-Fi"], 6.6, "Survive with your eyes closed"),
    ("Roma", "Movie", "Mexico", "2018", "R", "135 min", ["Drama"], 7.7, "1970s Mexico City family story"),
    ("The Irishman", "Movie", "United States", "2019", "R", "209 min", ["Biography", "Crime", "Drama"], 7.8, "Mafia hitman's life story"),
    ("Squid Game", "TV Show", "South Korea", "2021", "TV-MA", "60 min", ["Action", "Drama", "Mystery"], 8.0, "Deadly childhood games"),
    ("Don't Look Up", "Movie", "United States", "2021", "R", "138 min", ["Comedy", "Drama", "Sci-Fi"], 7.2, "Comet threatens Earth"),
    ("Bridgerton", "TV Show", "United States", "2020", "TV-MA", "60 min", ["Drama", "Romance"], 7.3, "Regency era romance"),
    ("Tiger King", "TV Show", "United States", "2020", "TV-MA", "47 min", ["Biography", "Crime", "Documentary"], 7.6, "Exotic animal park owner"),
    ("Extraction", "Movie", "Australia", "2020", "R", "116 min", ["Action", "Thriller"], 6.7, "Rescue mission in Bangladesh"),
    ("The Queen's Gambit", "TV Show", "United States", "2020", "TV-MA", "60 min", ["Drama"], 8.5, "Chess prodigy's rise to fame"),
    ("Ozark", "TV Show", "United States", "2017", "TV-MA", "60 min", ["Crime", "Drama", "Thriller"], 8.4, "Money laundering for cartel"),
    ("Dark", "TV Show", "Germany", "2017", "TV-MA", "60 min", ["Crime", "Drama", "Mystery", "Sci-Fi"], 8.8, "Time travel mystery"),
    ("The Witcher", "TV Show", "United States", "2019", "TV-MA", "60 min", ["Action", "Adventure", "Drama", "Fantasy"], 8.2, "Monster hunter's destiny"),
    ("Mindhunter", "TV Show", "United States", "2017", "TV-MA", "60 min", ["Crime", "Drama", "Thriller"], 8.6, "FBI behavioral analysis unit"),
    ("Klaus", "Movie", "Spain", "2019", "PG", "96 min", ["Adventure", "Animation", "Comedy", "Family"], 8.2, "Postal worker meets Santa"),
    ("I Am Mother", "Movie", "Australia", "2019", "TV-MA", "113 min", ["Drama", "Mystery", "Sci-Fi", "Thriller"], 6.7, "AI raises human daughter"),
    ("The Platform", "Movie", "Spain", "2019", "TV-MA", "94 min", ["Horror", "Sci-Fi", "Thriller"], 7.0, "Vertical prison social experiment"),
    ("Enola Holmes", "Movie", "United States", "2020", "PG-13", "123 min", ["Adventure", "Crime", "Drama", "Mystery"], 6.6, "Sherlock's sister solves mysteries")
]

In [13]:
columns = ["title", "type", "country", "year", "rating", "duration", "genres_list", "imdb_score", "description"]

Questions:

- Q1: What's the average IMDb rating for US content vs international content?
- Q2: Which country produces the highest-rated content on average?
- Q3: What are the top 3 most popular genres, and what's their average rating?
- Q4: Are TV shows rated higher than movies?

### Just Loops and lists

In [4]:
# Q1 What's the average IMDb rating for US content vs international content?


us_ratings = []
international_ratings = []

for title, type_, country, year, rating, duration, genres, imdb, desc in netflix_data:
    if country == "United States":
        us_ratings.append(imdb)
    else:
        international_ratings.append(imdb)


In [5]:
# Q2 Which country produces the highest-rated content on average?

In [6]:
# Q3 What are the top 3 most popular genres, and what's their average rating?

In [7]:
# Q4: Are TV shows generally rated higher than movies?

### Strategy 2: Object-Oriented with dataclasses

In [9]:
from dataclasses import dataclass
from collections import Counter, defaultdict

@dataclass
class NetflixShow:
    title: str
    type: str
    country: str
    year: int
    rating: str
    duration: str
    genres: list[str]
    imdb_score: float
    description: str

    def is_us_content(self) -> bool:
        # TODO: Implement
        pass

    def is_recent(self, cutoff_year: int = 2020) -> bool:
        # TODO: Implement
        pass

    def duration_minutes(self) -> int:
        # TODO: Parse "124 min" to 124
        pass

def analyze_with_dataclasses(data):
    shows = [NetflixShow(*item) for item in data]
    # TODO: Use your dataclass methods and collections tools
    pass

stranger_things_row = netflix_data[0]
stranger_things = NetflixShow(*stranger_things_row)
stranger_things


NetflixShow(title='Stranger Things', type='TV Show', country='United States', year='2016', rating='TV-14', duration='81 min', genres=['Drama', 'Fantasy', 'Horror'], imdb_score=8.7, description='Four friends uncover supernatural mysteries')

In [11]:
# Q1 What's the average IMDb rating for US content vs international content?

In [None]:
# Q2 Which country produces the highest-rated content on average?

In [None]:
# Q3 What are the top 3 most popular genres, and what's their average rating?


In [None]:
# Q4: Are TV shows generally rated higher than movies?


### Strategy 3: Pandas Power-User
Leverage pandas for fast aggregations and grouping.

In [14]:
import pandas as pd
df = pd.DataFrame(netflix_data, columns=columns)
df.head()

Unnamed: 0,title,type,country,year,rating,duration,genres_list,imdb_score,description
0,Stranger Things,TV Show,United States,2016,TV-14,81 min,"[Drama, Fantasy, Horror]",8.7,Four friends uncover supernatural mysteries
1,The Crown,TV Show,United Kingdom,2016,TV-MA,58 min,"[Biography, Drama, History]",8.6,British royal family drama
2,Money Heist,TV Show,Spain,2017,TV-MA,70 min,"[Action, Crime, Mystery]",8.3,Bank heist with a master plan
3,Bird Box,Movie,United States,2018,R,124 min,"[Drama, Horror, Sci-Fi]",6.6,Survive with your eyes closed
4,Roma,Movie,Mexico,2018,R,135 min,[Drama],7.7,1970s Mexico City family story


In [None]:
# Q1 What's the average IMDb rating for US content vs international content?


In [None]:
# Q2 Which country produces the highest-rated content on average?


In [None]:
# Q3 What are the top 3 most popular genres, and what's their average rating?


In [None]:
# Q4: Are TV shows generally rated higher than movies?
