In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
import warnings
warnings.filterwarnings('ignore')
In [2]:
df = pd.read_csv('Salary_Data.csv')
In [3]:
df.head()
Out[3]:
YearsExperience | Salary | |
---|---|---|
0 | 1.1 | 39343.0 |
1 | 1.3 | 46205.0 |
2 | 1.5 | 37731.0 |
3 | 2.0 | 43525.0 |
4 | 2.2 | 39891.0 |
In [4]:
df.info()
RangeIndex: 30 entries, 0 to 29 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 YearsExperience 30 non-null float64 1 Salary 30 non-null float64 dtypes: float64(2) memory usage: 608.0 bytes
In [5]:
df.isna().sum()
Out[5]:
YearsExperience 0 Salary 0 dtype: int64
data is clean
In [6]:
# Describe
In [7]:
df.describe()
Out[7]:
YearsExperience | Salary | |
---|---|---|
count | 30.000000 | 30.000000 |
mean | 5.313333 | 76003.000000 |
std | 2.837888 | 27414.429785 |
min | 1.100000 | 37731.000000 |
25% | 3.200000 | 56720.750000 |
50% | 4.700000 | 65237.000000 |
75% | 7.700000 | 100544.750000 |
max | 10.500000 | 122391.000000 |
In [8]:
sns.distplot(df['YearsExperience'])
Out[8]:
Similar Notebooks
- introducci c3 b3n a programaci c3 b3n
- complex data types in python working with lists and tuples in python code along
- intro to python code along
- lecture 1
- complex data types in python working with dictionaries sets in python
- numpy day
- 1 usingpythonasacalculator
- 1 pythonconditions
- complex data types in python shallow deep copies in python
- 4 listslicingandlistoperations
Copyright © Code Fetcher 2022