# !pip install pandas
import pandas as pd
df = pd.read_csv('nyc_weather.csv')
df
type(df)
pandas.core.frame.DataFrame
df['Temperature']
df[['Temperature']]
type(df[['Temperature']])
pandas.core.frame.DataFrame
type(df['Temperature']) # 1d values or single column
pandas.core.series.Series
df['Temperature'][0]
38
type(df['Temperature'][0]) # 1 byte = 8 bits | 8 bytes = 64 bits
numpy.int64
df.Temperature
df['Temperature'].max()
50
df['Temperature'].mean()
34.67741935483871
import numpy as np
np.NaN
nan
16 + np.NaN
nan
df.shape
(31, 11)
df.size
341
df.ndim
2
df.head()
EST Temperature DewPoint Humidity Sea Level PressureIn \\\\
0 1/1/2016 38 23 52 30.03
1 1/2/2016 36 18 46 30.02
2 1/3/2016 40 21 47 29.86
3 1/4/2016 25 9 44 30.05
4 1/5/2016 20 -3 41 30.57
VisibilityMiles WindSpeedMPH PrecipitationIn CloudCover Events \\\\
0 10 8.0 0 5 NaN
1 10 7.0 0 3 NaN
2 10 8.0 0 1 NaN
3 10 9.0 0 3 NaN
4 10 5.0 0 0 NaN
WindDirDegrees
0 281
1 275
2 277
3 345
4 333
df.head(3)
EST Temperature DewPoint Humidity Sea Level PressureIn \\\\
0 1/1/2016 38 23 52 30.03
1 1/2/2016 36 18 46 30.02
2 1/3/2016 40 21 47 29.86
VisibilityMiles WindSpeedMPH PrecipitationIn CloudCover Events \\\\
0 10 8.0 0 5 NaN
1 10 7.0 0 3 NaN
2 10 8.0 0 1 NaN
WindDirDegrees
0 281
1 275
2 277
df.tail()
EST Temperature DewPoint Humidity Sea Level PressureIn \\\\
26 1/27/2016 41 22 45 30.03
27 1/28/2016 37 20 51 29.90
28 1/29/2016 36 21 50 29.58
29 1/30/2016 34 16 46 30.01
30 1/31/2016 46 28 52 29.90
VisibilityMiles WindSpeedMPH PrecipitationIn CloudCover Events \\\\
26 10 7.0 T 3 Rain
27 10 5.0 0 1 NaN
28 10 8.0 0 4 NaN
29 10 7.0 0 0 NaN
30 10 5.0 0 0 NaN
WindDirDegrees
26 311
27 234
28 298
29 257
30 241
df[1:6]
EST Temperature DewPoint Humidity Sea Level PressureIn \\\\
1 1/2/2016 36 18 46 30.02
2 1/3/2016 40 21 47 29.86
3 1/4/2016 25 9 44 30.05
4 1/5/2016 20 -3 41 30.57
5 1/6/2016 33 4 35 30.50
VisibilityMiles WindSpeedMPH PrecipitationIn CloudCover Events \\\\
1 10 7.0 0 3 NaN
2 10 8.0 0 1 NaN
3 10 9.0 0 3 NaN
4 10 5.0 0 0 NaN
5 10 4.0 0 0 NaN
WindDirDegrees
1 275
2 277
3 345
4 333
5 259
df[5:11][['Temperature', 'Humidity']]
Temperature Humidity
5 33 35
6 39 33
7 39 64
8 44 77
9 50 71
10 33 37
df[['Temperature','Humidity']][ df['Temperature'] == 30 ]
Temperature Humidity
13 30 47
df[['Temperature','Humidity']][ (df['Temperature'] == 30) & (df['Humidity'] == 47) ]
Temperature Humidity
13 30 47
df[ (df['Temperature'] == 30) & (df['Humidity'] == 47) ]
EST Temperature DewPoint Humidity Sea Level PressureIn \\\\
13 1/14/2016 30 12 47 29.95
VisibilityMiles WindSpeedMPH PrecipitationIn CloudCover Events \\\\
13 10 5.0 T 7 NaN
WindDirDegrees
13 266
df.columns
Index(['EST', 'Temperature', 'DewPoint', 'Humidity', 'Sea Level PressureIn',
'VisibilityMiles', 'WindSpeedMPH', 'PrecipitationIn', 'CloudCover',
'Events', 'WindDirDegrees'],
dtype='object')
df['EST']
0 1/1/2016
1 1/2/2016
2 1/3/2016
3 1/4/2016
4 1/5/2016
5 1/6/2016
6 1/7/2016
7 1/8/2016
8 1/9/2016
9 1/10/2016
10 1/11/2016
11 1/12/2016
12 1/13/2016
13 1/14/2016
14 1/15/2016
15 1/16/2016
16 1/17/2016
17 1/18/2016
18 1/19/2016
19 1/20/2016
20 1/21/2016
21 1/22/2016
22 1/23/2016
23 1/24/2016
24 1/25/2016
25 1/26/2016
26 1/27/2016
27 1/28/2016
28 1/29/2016
29 1/30/2016
30 1/31/2016
Name: EST, dtype: object
type(df['EST'][0])
str
weather_data = {
'day': ['1/1/2023', '1/2/2023', '1/3/2023', '1/4/2023','1/5/2023', '1/6/2023'],
'temperature': [32,35,28,24,32,38],
'windspeed': [6,7,8,2,4,3],
'event': ['Rain', 'Sunny', 'Snow', 'Rain', 'Sunny', 'Snow']
}
df = pd.DataFrame(weather_data)
df
day temperature windspeed event
0 1/1/2023 32 6 Rain
1 1/2/2023 35 7 Sunny
2 1/3/2023 28 8 Snow
3 1/4/2023 24 2 Rain
4 1/5/2023 32 4 Sunny
5 1/6/2023 38 3 Snow
df.set_index('day')
temperature windspeed event
day
1/1/2023 32 6 Rain
1/2/2023 35 7 Sunny
1/3/2023 28 8 Snow
1/4/2023 24 2 Rain
1/5/2023 32 4 Sunny
1/6/2023 38 3 Snow
df
day temperature windspeed event
0 1/1/2023 32 6 Rain
1 1/2/2023 35 7 Sunny
2 1/3/2023 28 8 Snow
3 1/4/2023 24 2 Rain
4 1/5/2023 32 4 Sunny
5 1/6/2023 38 3 Snow
df.set_index('day', inplace = True)
df
temperature windspeed event
day
1/1/2023 32 6 Rain
1/2/2023 35 7 Sunny
1/3/2023 28 8 Snow
1/4/2023 24 2 Rain
1/5/2023 32 4 Sunny
1/6/2023 38 3 Snow
df.columns
Index(['temperature', 'windspeed', 'event'], dtype='object')
df.index
Index(['1/1/2023', '1/2/2023', '1/3/2023', '1/4/2023', '1/5/2023', '1/6/2023'], dtype='object', name='day')
df.loc['1/2/2023']
temperature 35
windspeed 7
event Sunny
Name: 1/2/2023, dtype: object
type(df.loc['1/2/2023'])
pandas.core.series.Series
df.reset_index(inplace = True)
df
day temperature windspeed event
0 1/1/2023 32 6 Rain
1 1/2/2023 35 7 Sunny
2 1/3/2023 28 8 Snow
3 1/4/2023 24 2 Rain
4 1/5/2023 32 4 Sunny
5 1/6/2023 38 3 Snow
df.set_index('event', inplace = True)
df
day temperature windspeed
event
Rain 1/1/2023 32 6
Sunny 1/2/2023 35 7
Snow 1/3/2023 28 8
Rain 1/4/2023 24 2
Sunny 1/5/2023 32 4
Snow 1/6/2023 38 3
df.loc['Snow']
day temperature windspeed
event
Snow 1/3/2023 28 8
Snow 1/6/2023 38 3
df.reset_index(inplace = True)
using excel
df = pd.read_excel('weather_data.xlsx', 'Sheet1')
df
day temperature windspeed event
0 2017-01-01 32 6 Rain
1 2017-01-02 35 7 Sunny
2 2017-01-03 28 2 Snow
using list and tuple
weather_data = [
('1/1/2023', 32, 6, 'Rain'),
('1/2/2023', 35, 6, 'Snow'),
('1/3/2023', 30, 6, 'Sunny')
]
df = pd.DataFrame(weather_data)
df
0 1 2 3
0 1/1/2023 32 6 Rain
1 1/2/2023 35 6 Snow
2 1/3/2023 30 6 Sunny
weather_data = [
('1/1/2023', 32, 6, 'Rain'),
('1/2/2023', 35, 6, 'Snow'),
('1/3/2023', 30, 6, 'Sunny')
]
df = pd.DataFrame(weather_data, columns=['day', 'temperature', 'windspeed', 'event'])
df
day temperature windspeed event
0 1/1/2023 32 6 Rain
1 1/2/2023 35 6 Snow
2 1/3/2023 30 6 Sunny
df.to_csv('indore.csv', index=True)
df.to_excel('goa.xlsx', sheet_name='weather' , index=True)
# from pandasql import sqldf
# q = "SELECT * from students"
# sqldf(q)
df = pd.read_csv('weather_data2.csv', parse_dates = ['day'])
df
day temperature windspeed event
0 2017-01-01 32.0 6.0 Rain
1 2017-01-04 NaN 9.0 Sunny
2 2017-01-05 28.0 NaN Snow
3 2017-01-06 NaN 7.0 NaN
4 2017-01-07 32.0 NaN Rain
5 2017-01-08 NaN NaN Sunny
6 2017-01-09 NaN NaN NaN
7 2017-01-10 34.0 8.0 Cloudy
8 2017-01-11 40.0 12.0 Sunny
type(df['day'][0])
pandas._libs.tslibs.timestamps.Timestamp
df.set_index('day', inplace=True)
df
temperature windspeed event
day
2017-01-01 32.0 6.0 Rain
2017-01-04 NaN 9.0 Sunny
2017-01-05 28.0 NaN Snow
2017-01-06 NaN 7.0 NaN
2017-01-07 32.0 NaN Rain
2017-01-08 NaN NaN Sunny
2017-01-09 NaN NaN NaN
2017-01-10 34.0 8.0 Cloudy
2017-01-11 40.0 12.0 Sunny
fill not available