Hello guys, welcome back to my blog. In this article, I will discuss python programming for data science and machine learning, I will share code from basic concepts to advance.
#Command to print hello world
print("!!!Hello World")
!!!Hello World
#Command to print world in next line
print("Hello\nWorld")
Hello
World
#Code to store string value and display it
strworld = "Dear Chetan"
print(strworld)
Dear Chetan
#Command to know type
type(strworld)
str
#BEDMAS
print(2 - 4 * 2 + 9 / 10)
-5.1
#Arthematic operation
a = 9
b = 5
print(a + b)
print(a - b)
print(a / b)
print(a // b) #Quotient
print(a ** b) #Exponential
print(a % b) #Gives remainder
14
4
1.8
1
59049
4
#Increment and decrement of number
inc = 8
inc += 1
print(inc)
dec = 8
dec -= 1
print(dec)
9
7
#float type checking
num = 30.3
print(num)
type(num)
30.3
float
#Complex number
com = 3 + 8j
print(com)
type(com)
(3+8j)
complex
#Logical operation
print(13<25)
print(True and False)
#It will print true if both are True
print(True or False)
#It will print true if any one is True
True
False
True
#Text conversion from upper case to lower or vise-versa
sentence = "i am lower case now"
print(sentence.upper())
sentence = "I AM UPPER CASE NOW"
print(sentence.lower())
I AM LOWER CASE NOW
i am upper case now
#right strip sentence
strip_sentence = "There is a space at the end "
print(strip_sentence)
print(strip_sentence.rstrip())
#rstrip will delete the space
There is a space at the center end
There is a space at the center end
#left strip sentence
strip_sentence = " There is a space at the Beginning"
print(strip_sentence)
print(strip_sentence.lstrip())
There is a space at the Beginning
There is a space at the Beginning
#Strip all or delete all space
chetan = " Shidling "
print(chetan)
print(chetan.strip())
Shidling
Shidling
#Strip which is mentioned
percent = "555%"
print(percent.rstrip("%"))
555
#mixed strip
mixedstr = "*****SSSSSS######"
print(mixedstr.rstrip("#").lstrip("*"))
#rstrip means strip right side
#lstrip means strip left side
SSSSSS
#Sentence operation
mixedstr = "20 people affected by Carona"
no_of_people = mixedstr[0:2]
print("Number of people are :", no_of_people)
Number of people are : 20
#rest of string
Rest_of_string = mixedstr[2:]
print(Rest_of_string)
people affected by Carona
groupstr = "10 Apple 5 Banansa 3 Orage"
fruit = groupstr[19 :27]
print(fruit)
3 Orage
print(groupstr[:-2])
10 Apple 5 Banansa 3 Ora
#Find even or odd
#even
nums_seq = "123456789"
even_nums = nums_seq[1::2]
print(even_nums)
#Print first name and last name
first_name = "Chetan"
last_name = "Shidling"
name = first_name + " " + last_name
print(name)
Chetan Shidling
#My age
my_age = input("Enter my age :")
my_age_sen = ("I am " + my_age +" years old")
print(my_age_sen)
Enter my age :21
I am 21 years old
#Scentence formation
A = "Data"
B = "Science"
C = "Python"
print("{0} {1} using {2}".format(A, B, C))
Data Science using Python
#Arrays
#How to create list
create_list = []
print(create_list)
type(create_list)
[]
list
#Another way using constructor list()
list_1 = list()
type(list_1)
list
Methods performed on lists
#append() - adds an element at the end of the list
#insert() - adds an element at the specified position
#extend() - adds the elements of a list ( or any iterable), to the end of the current list
#copy() - returns a copy of the list
#count() - retuns the number of elements with the specified value
#clear() - removes all the elements from the list
#index() - returns the index of the first element with the specified value
#remove() - removes the item with the specified value
#pop() - removes the order of the list
#reverse() - reverses the order of the list
#sort() - sorts the list
#Data Structures
fruits = ['Orange', 'Apple', 'Pear', 'Banana', 'Kiwi', 'Apple', 'Banana']
#How to find count of elements inside a list
fruits.count('Apple')
2
#How to find index
fruits.index('Banana')
fruits.index('Banana', 4)#It will find banana index starting a position 4
#Sentence into words in sentence
a_sentence = "Corona virus is spreading all over the world"
words_in_sentence = a_sentence.split()
print(words_in_sentence)
#mutli sentence
multi_sentence = "I am Chetan. I have a pet dog. It's color is white and black, but it looks big."
sentence_inText = multi_sentence.split('.')
print(sentence_inText)
['I am Chetan', ' I have a pet dog', " It's color is white and black, but it looks big", '']
#Inser $ between all character
join_sentence = '$'.join(multi_sentence)
print(join_sentence)
#Remove element from a list using for loop
list1 = ['Apple','Banana','Orange','Orange','Banan','Banana','Apple']
count1 = list1.count('Banana')
for i in range(0,count1):
list1.remove('Banana')
print(list1)
['Apple', 'Orange', 'Orange', 'Banan', 'Apple']
#Find length of number list
num_list = [2, 3, 5, 1, 9, 3, 1]
print(len(num_list))
7
#How to sort numbers
print(sorted(num_list))
[1, 1, 2, 3, 3, 5, 9]
#How to find minimum value
print(min(num_list))
1
#How to find maximum value
print(max(num_list))
9
#How to create a nested list
nested_list = [[2, 5, 1],[5, 9, 3],[9, 2, 0]]
print(nested_list[1])
[5, 9, 3]
#How to print any one element is nested list
print(nested_list[0][2])
1
#How to print any few list
print(nested_list[0:2])
[[2, 5, 1], [5, 9, 3]]
#Delete particular element in the list
num_list = [-1, 3, 4, 1, 5, 6, 9, 22]
del num_list[2]
print(num_list)
[-1, 3, 1, 5, 6, 9, 22]
#Tuple - They are immutable and usually contain a heterogeneous sequence of elements
sample_tuple = 23433, 84392, "Friends"
type(sample_tuple)
#Empty tuples are contructed by an empty pair of parentheses
#A tuple with one item is constructed by following a value with a comma
#It is not sufficient to enclose a single value in parenthese. Ugly, but effective
empty_tuple = ()
single_tuple = 'Dear',
print(single_tuple)
type(single_tuple)
('Dear',)
tuple
#Now let's see difference between list and tuple
list_1 = [12, 45, 55]
print(list_1)
print("\n")
tuple_1 = tuple(list_1)
print(tuple_1)
type(tuple_1)
#Dictionaries - A set of key:value pairs - with the requirement that the keys are unique (within one dictionary)
#Use {} to create dictionary
#Use 'dict()' to create dictionary
#dictionaries are indexed by keys, which can be immutable
def square(num):
out = num**2
return(out)
sq_r = square(3)
print(sq_r)
9
#Find factorial
def factorial(n):
if n>1:
return n*factorial(n-1)
else:
return n
fact = factorial(6)
print(fact)
720
#Addition of argument
def addition(*args):
print(args)
return(sum(args))
print(addition(4,6,2,7,3,11))
print(addition(1,2))
(4, 6, 2, 7, 3, 11)
33
(1, 2)
3
#Conditional statements
score = int(input("Enter a number :"))
if score < 0:
print("Enter positive number")
elif score == 0:
print("You scored Zeo")
elif score == 1:
print("You scored one")
else:
print("It is above one")
Enter a number :2
It is above one
#Multiple if statement
score = int(input("Please enter your score :"))
cutoff = 75
pass_marks = 35
if score >= 75:
print("You got distinction")
if score >= 35:
print("You passed")
if score < 35:
print("You failed")
Please enter your score :45
You passed
#If else
score = int(input("Enter you score"))
if score >=35:
print("You passed")
else:
print("You failed")
Enter you score12
You failed
#With strings
str1 = "Chetan works at CS Electrical And Electronics"
if "Electrical" in str1:
print("It is there")
else:
print("Not found")
print("\n")
str2 = "World is very big and lovely"
if "verymuch" in str2:
print("It is found in str2")
else:
print("Not found")
It is there
Not found
if False:
print("True for this statement")
else:
print("False for this statement")
False for this statement
if True:
print("True for this statement")
else:
print("False for this statement")
True for this statement
#Loops
fruits = ['Apple','Orange','Banana']
for i in fruits:
print(i, len(i))
Apple 5
Orange 6
Banana 6
#Using range function, loop through...
for w in range(6, 15):
print(w)
6
7
8
9
10
11
12
13
14
#Range function with increment
for i in range(10,20,2):
print(i)
10
12
14
16
18
#Looping on a string
string = "Jai Shri Ram"
for alphabet in string:
print(alphabet)
J
a
i
S
h
r
i
R
a
m
#Triangle
str1 = ' '
for i in range(0,9):
if i<5:
str1 += '* '
print(str1)
elif i>4:
str1 = str1[:-2]
print(str1)
*
* *
* * *
* * * *
* * * * *
* * * *
* * *
* *
*
#Find count in statement
my_string = "Count the statement alpabets"
for n,alphabet in enumerate(my_string):
print(alphabet, n)
C 0
o 1
u 2
n 3
t 4
5
t 6
h 7
e 8
9
s 10
t 11
a 12
t 13
e 14
m 15
e 16
n 17
t 18
19
a 20
l 21
p 22
a 23
b 24
e 25
t 26
s 27
#Find the vowels
vowels =' '
for alphabet in my_string:
if alphabet in 'aeiou':
vowels += ' ' + alphabet
print(vowels)
o u e a e e a a e
#Find the even and odd numbers
num = '0123456789'
even = ' '
odd = ' '
for number in num:
if int(number)%2 == 0:
even += number
else:
odd += number
print('All evens are : ' + even + ' & All odds are : '+odd)
All evens are : 02468 & All odds are : 13579
#Sentence check
str1 = "#Chetan #Shidling is a #Entrepreneur"
for word in str1.split():
if word.startswith('#'):
print(word[1:])
Chetan
Shidling
Entrepreneur
#Dictionary
students = {1:['Chetan',21], 2:['Nitin',19], 3:['Krish', 22]}
type(students)
dict
for key, val in students.items():
print(key, val)
1 ['Chetan', 21]
2 ['Nitin', 19]
3 ['Krish', 22]
for key in students.keys():
print(key)
1
2
3
for val in students.values():
print(val)
['Chetan', 21]
['Nitin', 19]
['Krish', 22]
#While loop for fibonacci series
a, b = 0, 1
n = 9
while a < n:
print(a, end=' ')
a, b = b, a+b
0 1 1 2 3 5 8
#NumPy - NUMerical PYthon
#It is used for scientific computing and data analysis
#Power of NumPy is N-dimensional array object which is in the form of rows and columns.
#Fast
#Less memory
#Convenient
#Vectorized code
#How to import the numpy library
import numpy as np
#np is an alias, you may use any other alias, though np is standard in the industry
#2-D array creation using two lists
fromList_to_2dArray = np.array([[0,1,2,3,4],[5,6,7,8,9]])
print(fromList_to_2dArray)
[[0 1 2 3 4]
[5 6 7 8 9]]
#Let us see how Numpy works when compare to standard python
#Sum of two list elements
num_list1 = [1,2,3,4,5,6,7,8,9]
num_list2 = [10,11,12,13,14,15,16,17,18]
sum_list = list(map(lambda x, y: x+y, num_list1, num_list2))
print(sum_list)
###How to initialize np arrays when size is known
#Functions that are used to do so are:
#np.arange(): Array creation with defined increments
#np.zeros(): Array of 0s
#np.ones(): Array of 1s
#np.random.random(): Array of random numbers
#np.random.randint(): Random array of integers within a particular range
#np.linspace(): Array of fixed length
#np.full(): Constant array of any number 'n'
#np.eye(): Identity matrix array
#np.title(): Array is going to be created many times
#Find dtype, shape, itemsize, and ndim of array1
print(array1.dtype)
print(array1.shape)
print(array1.itemsize)
print(array1.ndim)
float64
(5, 5)
8
2
#Creating a 3-d array - Difficult to print it
#reshape() simply reshapes a 1-D array
array_3d = np.arange(12).reshape(2,3,2)
print(array_3d)
[[[ 0 1]
[ 2 3]
[ 4 5]]
[[ 6 7]
[ 8 9]
[10 11]]]
#Indexing, Subseting, Slicing and iterating through Arrays
#Indexing and slicing one dimensional arrays
array1d = np.arange(5)
print(array1d)
[0 1 2 3 4]
#Access thro indexing
array1d[2]
array1d[2:3]
array([2])
#Read all elements
array1d[[2,3,4]]
array([2, 3, 4])
#Subset with an increment of 2
print(array1d[0::2])
[0 2 4]
#Iterations are also similar to lists
for i in array1d:
print(i*2)
0
2
4
6
8
#2 D array accessing
array2d = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(array2d)
print(array2d.ndim)
print(array2d[1,3])
[[ 1 2 3 4]
[ 5 6 7 8]
[ 9 10 11 12]]
2
8
print(array2d[2,:])
#Slicing all colums
print(type(array2d[2,:]))
#Slicng all rows
print(array2d[:,1])
#Slicing all rows and the first 2 colums
print(array2d[:,:2])
#Iterating over 2D/3D arrays (row wise and columwise)
for colum in array2d:
print(colum)
[1 2 3 4]
[5 6 7 8]
[ 9 10 11 12]
#Execution speed in NumPy and Standard Python Lists
NumPy is much faster!!!
#Import time library
import time
#create two large lists
list1 = [i for i in range(200000)]
list2 = [j**2 for j in range(200000)]
#Capture start time & end time
start_time = time.time()
perform_Ops = list(map(lambda x,y: x*y, list1, list2))
Ops_time = time.time()
diff_time = Ops_time - start_time
print(diff_time)
#numpy array
array1 = np.array([i for i in range(200000)])
array2 = np.array([j**2 for j in range(200000)])
np_startTime = time.time()
array3 = array1*array2
np_endTime = time.time()
numpy_time = np_endTime - np_startTime
print(numpy_time)
print("The ratio of the time taken {}".format(numpy_time/diff_time))
0.03191232681274414
0.000997304916381836
The ratio of the time taken 0.03125140082181547
#File is a named location on a disk to store related information.
#It is used to store the data permanently in a non volatile memory
#Variables will go off
#Mantra
#Open -> R or W -> Close
#Mode
#r-Read, w-Write, a-append, r+ - read and write
#file open
#open(path,mode)
open('G:\\Chetan\\work.txt',"w")
#close()
['Hi i am chetan\n', 'Hi i am chetan\n', 'Hi i am chetan\n', 'Hi i am chetan\n', 'Hi i am chetan\n', 'Hi i am chetan\n']
#One by one
fo = open('G:\\Chetan\\work.txt',"r")
linedata = fo.readlines()
for line in linedata:
print(line)
fo.close()
Hi i am chetan
Hi i am chetan
Hi i am chetan
Hi i am chetan
Hi i am chetan
Hi i am chetan
#Writing to file
fo = open('G:\\Chetan\\work.txt',"w")
fo.write("I am in Banulure\n")
fo.write("I came here to start my own company\n")
fo.close()
#Open file you will find previous data will be vanished
#append
fo = open('G:\\Chetan\\work.txt',"a")
fo.write("\nHa ha, Deleted na?\n")
fo.close()
#Ask user
fo = open('G:\\Chetan\\work.txt',"a")
message = input("\nEnter your message :")
fo.write(message)
fo.close()
Enter your message :It nice
NumPy Arrays - Few Operations
Basic mathematical operations / linear algebra operations / functions
Playing with arrays using resize / reshape / stack creation
#Playing with arrays
#import the numpy library
import numpy as np
#If the arrays don't have the same dimensions
#use resize()
sample_array = np.arange(0,11)
print(sample_array)
print(sample_array.size)
print(np.resize(sample_array, (3,4)))
print(np.resize(sample_array, (6,4)))
print(sample_array.resize(3,5))
Linear Algebra Operations
help(np.linelg) - Provides linear algebra documentation
np.linalg - is a package
np.linalg.inv - inverse of a matrix
np.linalg.det - Determinant of a matrix
np.linalg.eig - Eigenvalues and eigenvectors of a matrix
np.dot(a,b) - dot product of two matrices
#Determinant of 3*3 matrix vs 3*4 matrix
import numpy as np
print(np.linalg.det(array1))
#np.linalg.det(array2)
#array2 is 3*4 matrix, so it will throw error
Basics of Pandas library
Used for data analysis - Data manipulation, building ML models and data visualisation
Pandas data structures - Series & Dataframes
Pandas Series
01. import pandas library
02. To create series object, use pd.series()
03. Each column in a dataframe is a pandas series
#import pandas
import pandas as pd
#Creating a character series from tuple
char_series = pd.Series(('H','A','R','I'))
char_series
0 H
1 A
2 R
3 I
dtype: object
#Creating series from list
num_series = pd.Series([3,6,9,12,15,18,21])
print(num_series)
print(type(num_series))
#Setting up indexes explicity
#Use index argument while generating series
num_series = pd.Series([3,6,9,12,15], index = ['a','b','c','d','e'])
num_series
a 3
b 6
c 9
d 12
e 15
dtype: int64
#Another way of defining index
import numpy as np
series1 = pd.Series(np.array(range(0,5))+2, index = range(0,5))
series1
0 2
1 3
2 4
3 5
4 6
dtype: int32
The Pandas Dataframe
Dataframe - It is a table with rows and columns
Rows having an index and columns having meaningful names
Creating dataframes
Ways to create dataframes are:
from dictionaries
JSON objects
Reading from CSV files, flat files or txt files, etc.