import pandas as pd
s = pd.Series(["Solar","Wind", "Thermal", "Hydroelectric" ], index=['1', '2', '3', '4'])
data = {'Powerplants': ['Bandırma1', 'Bandırma2', 'Kentsa'],
'Capacity': ['936', '607', '40'],
'Year': ["2010", "2016","1997"]}
df = pd.DataFrame(data,columns=['Powerplants', 'Capacity', 'Year'])
The first column 0,1,2 is the index and Powerplants,Capacity,Year are the Columns.
help(pd.Series.loc)
pd.read_csv('file.csv', header=None, nrows=5)
df.to_csv('myDataFrame.csv')
xlsx = pd.ExcelFile('file.xls')
df = pd.read_excel(xlsx, 'Sheet1')
pd.read_excel('file.xlsx')
df.to_excel('dir/myDataFrame.xlsx', sheet_name='Sheet1')
from sqlalchemy import create_engine
engine = create_engine('sqlite:///:memory:')
pd.read_sql('SELECT * FROM table;', engine)
pd.read_sql_table('my_table', engine)
pd.read_sql_query('SELECT * FROM my_table;', engine)
df.to_sql('myDf', engine)
Our series:
s['2']
#Out[]: ' Wind'
Get subset of a DataFrame
Our dataframe:
df[1:]
#Out[]:
''' Powerplants Capacity Year
1 Bandırma2 607 2016
2 Kentsa 40 1997'''
Our dataframe:
df.iloc[1, 1]
#Out[]: 607
df.iat[0, 0]
#Out[]: Bandırma1
df.iat[1, 1]
#Out[]: 607
Our dataframe:
df.loc[0, 'Year']
#Out[]: 2010
df.at[0, 'Year']
#Out[]: 2010
s = pd.Series([1, -2, -6, 4], index=['a', 'b', 'c', 'd'])
s[~(s > 2)]
'''Out[]:
a 1
b -2
c -6'''
s where value is <-2 or >2
s[(s < -2) | (s > 2)]
'''Out:
c -6
d 4'''
s['c']
#Out:6
s.drop(['a', 'c'])
'''Out[]
a 1
d 4'''
Drop values from columns(axis=1)
df.drop('Year', axis=1)
'''Out[]:
Powerplants Capacity
0 Bandırma1 936
1 Bandırma2 607
2 Kentsa 40'''
df.sort_index()
'''Out[]:
Powerplants Capacity Year
0 Bandırma1 936 2010
1 Bandırma2 607 2016
2 Kentsa 40 1997'''
Sort by the values along an axis
df.sort_values(by='Powerplants')
Powerplants Capacity Year
0 Bandırma1 936 2010
1 Bandırma2 607 2016
2 Kentsa 40 1997
df.sort_values(by='Capacity')
'''Out[]:
Powerplants Capacity Year
2 Kentsa 40 1997
1 Bandırma2 607 2016
0 Bandırma1 936 2010'''
df.rank()
'''Out[]:
Powerplants Capacity Year
0 1.0 3.0 2.0
1 2.0 2.0 3.0
2 3.0 1.0 1.0'''
df.shape
#Out[28]: (3, 3)
df.columns
#Out[]:Index(['Powerplants', 'Capacity', 'Year'], dtype='object')
Info on DataFrame
df.info()
'''Out[]: Index(['Powerplants', 'Capacity', 'Year'], dtype='object')
Capacity Year
0 Bandırma1 936 2010
1 Bandırma2 607 2016
2 Kentsa 40 19'''
Number of non-NA values
df.count()
'''Out[]:
Powerplants 3
Capacity 3
Year 3'''
Summary Sum of values
df.sum()
'''Out[]:
Powerplants Bandırma1Bandırma2Kentsa
Capacity 93660740
Year 201020161997'''
Cumulative sum of values
df.cumsum()
Minimum/maximum values
df.min()
'''Out[]:
Powerplants Bandırma1
Capacity 40
Year 1997'''
df.max()
'''Out[]:
Powerplants Kentsa
Capacity 936
Year 2016'''