Learn Data Analysis, Machine Learning, and Visualization

Explore opportunities, learn concepts, and practice with real-world examples.

Examples and Sample Code

1. Data Cleaning with Pandas

import pandas as pd

# Sample data
data = {'Name': ['Alice', 'Bob', 'Charlie', None],
        'Age': [25, None, 30, 22],
        'Salary': [50000, 60000, None, 45000]}

df = pd.DataFrame(data)

# Cleaning missing data
df = df.fillna({'Age': df['Age'].mean(), 'Salary': df['Salary'].median()})
print(df)
        
2. Simple Linear Regression

from sklearn.linear_model import LinearRegression
import numpy as np

# Data
X = np.array([[1], [2], [3], [4]])
y = np.array([2.2, 4.4, 6.1, 8.5])

# Model
model = LinearRegression()
model.fit(X, y)

print("Coefficient:", model.coef_)
print("Intercept:", model.intercept_)
        
3. Matplotlib Visualization

import matplotlib.pyplot as plt

# Sample data
categories = ['A', 'B', 'C', 'D']
values = [3, 7, 8, 5]

# Bar chart
plt.bar(categories, values, color=['blue', 'green', 'red', 'purple'])
plt.title('Sample Bar Chart')
plt.show()
        
4. Word Cloud Visualization

from wordcloud import WordCloud
import matplotlib.pyplot as plt

# Text
text = "data machine learning visualization analysis AI Python"
wordcloud = WordCloud(background_color='white').generate(text)

# Display
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
        
5. K-Means Clustering

from sklearn.cluster import KMeans
import numpy as np

# Data
data = np.array([[1, 2], [2, 3], [3, 4], [8, 9], [9, 10]])
kmeans = KMeans(n_clusters=2)
kmeans.fit(data)

print("Cluster Centers:", kmeans.cluster_centers_)
        
5.Descriptive Statistics


        import numpy as np

# Sample data
data = [12, 15, 14, 10, 8, 12, 14, 18]

# Calculating statistics
mean = np.mean(data)
median = np.median(data)
std_dev = np.std(data)

print("Mean:", mean)
print("Median:", median)
print("Standard Deviation:", std_dev)


    
6.Data Visualization with Seaborn

        import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Sample data
data = {'Category': ['A', 'B', 'C', 'D'],
        'Values': [45, 30, 15, 10]}

df = pd.DataFrame(data)

# Pie chart
plt.pie(df['Values'], labels=df['Category'], autopct='%1.1f%%', startangle=90)
plt.title("Pie Chart Example")
plt.show()


    
7.Logistic Regression for Classification

        from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Load dataset
data = load_iris()
X = data.data
y = data.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

print("Model Accuracy:", model.score(X_test, y_test))


    
8.DataFrame Operations in Pandas

        import pandas as pd

# Create a DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'Salary': [50000, 60000, 70000]}

df = pd.DataFrame(data)

# Add a new column
df['Bonus'] = df['Salary'] * 0.1

# Display updated DataFrame
print(df)


    
9.Scatter Plot with Matplotlib

        import matplotlib.pyplot as plt

# Data
x = [5, 7, 8, 7, 2, 17, 2, 9, 4, 11]
y = [99, 86, 87, 88, 100, 86, 103, 87, 94, 78]

# Plot
plt.scatter(x, y, color='blue', label='Data Points')
plt.title('Scatter Plot Example')
plt.xlabel('X-Axis')
plt.ylabel('Y-Axis')
plt.legend()
plt.show()


    
10.Heatmap Visualization

        import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

# Generate data
data = np.random.rand(10, 10)

# Create heatmap
sns.heatmap(data, annot=False, cmap='coolwarm', linewidths=0.5)
plt.title("Heatmap Example")
plt.show()


    
11. Decision Tree Classifier

        from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load dataset
data = load_iris()
X = data.data
y = data.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train Decision Tree
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

print("Accuracy:", clf.score(X_test, y_test))


    
12.Line Chart with Matplotlib

        import matplotlib.pyplot as plt

# Data
x = [1, 2, 3, 4, 5]
y = [10, 20, 15, 25, 30]

# Line plot
plt.plot(x, y, marker='o', linestyle='--', color='green')
plt.title('Line Chart Example')
plt.xlabel('X-Axis')
plt.ylabel('Y-Axis')
plt.grid(True)
plt.show()


    
13.Principal Component Analysis (PCA)

        from sklearn.decomposition import PCA
from sklearn.datasets import load_iris

# Load dataset
data = load_iris()
X = data.data

# Perform PCA
pca = PCA(n_components=2)
X_reduced = pca.fit_transform(X)

print("Original shape:", X.shape)
print("Reduced shape:", X_reduced.shape)


    
14.Histogram with Matplotlib

        import matplotlib.pyplot as plt
import numpy as np

# Generate data
data = np.random.randn(1000)

# Histogram
plt.hist(data, bins=30, color='skyblue', edgecolor='black')
plt.title('Histogram Example')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()


    
15.CloneWebPage_2.py

        #BaseUrl Of the website
#place link of the website without index.html
#eg: http://xyz.com/index.html is the website you want to clone
#put the base URL as http://xyz.com
baseurl = 'Replace this'

from bs4 import BeautifulSoup
import os
import urllib
import urllib2

print '''Python script to Clone a Web Page
Author : Sai Kiran Goud
Date : 14 May 2017
'''
print "Connecting to server"
response = urllib2.urlopen(baseurl)
html_doc = response.read()
print "Connection Success!"
try :
        soup = BeautifulSoup(html_doc, 'html.parser')
        f = open( 'index.html', 'w' )
        f.write(str(soup))
        f.close()
        print "Initializing Index File" 
        #Get All Images
        print "Process Initiated"
        print "Step 1: Getting all images."
        a = soup.find_all('img')
        for i in range(len(a)):
            directory =  a[i]['src']
            print '\t[+]Getting file = '+str(directory)
            if not os.path.exists(os.path.dirname(directory)):
                print "    [DIR]Creating directory"
                os.makedirs(os.path.dirname(directory))
            testfile = urllib.URLopener()
            testfile.retrieve(baseurl+directory, directory)
        print '==============Done getting images!=============='
        #Get all Css
        print "Step 2: Getting all CSS."
        a = soup.find_all('link')
        for i in range(len(a)):
            directory =  a[i]['href']
            if "http" in directory or "https" in directory:
                print "------Skipped for ----- ",directory
                continue
            print '\t[+]Getting file = '+str(directory)
            if "/" not in directory:
                    print "\tNo directory. Saving file",directory
            elif not os.path.exists(os.path.dirname(directory)):
                print "    [DIR]Creating directory"
                os.makedirs(os.path.dirname(directory))
            testfile = urllib.URLopener()
            testfile.retrieve(baseurl+directory, directory)
        print '==============Done getting CS files!=============='
        print "Step 3: Getting all JS."
        #Get all JS
        a = soup.find_all('script')
        for i in range(len(a)):
            try:
                directory =  a[i]['src']
            except Exception as e:
                print "Excpetion occured in JS for",a[i]
                continue
            if "http" in directory or "https" in directory:
                print "------Skipped for ----- ",directory
                continue
            print '\t[+]Getting file = '+str(directory)
            if not os.path.exists(os.path.dirname(directory)):
                print "    [DIR]Creating directory"
                os.makedirs(os.path.dirname(directory))
            testfile = urllib.URLopener()
            testfile.retrieve(baseurl+directory, directory)
        print '==============Done getting JS Files!=============='
        print 'Script Executed sucessfully!'
except Exception as e:
    print "Exception occured = ",e

    
Back to Home