First Commit

agusrajuthaliyan · agusrajuthaliyan · commit 70361e4cfee5 · 2024-09-15T14:25:56.000+05:30
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,22 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Virtual Environment
+venv/
+env/
+
+# Streamlit
+.streamlit/
+
+# Model files
+models/*.pkl
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# IDE files
+.vscode/
+.idea/
diff --git a/README.md b/README.md
@@ -0,0 +1,32 @@
+# Stock Market Prediction App
+
+This Streamlit app uses a Random Forest model to predict stock prices based on historical data.
+
+## Features
+
+- Fetch and display historical stock data
+- Train a Random Forest model for price prediction
+- Visualize stock price trends and model performance
+- Make predictions based on user input
+
+## Installation
+
+1. Clone this repository:
+   ```
+   git clone https://github.com/agsurajuthaliyan/stock-market-prediction.git
+   cd stock-market-prediction
+   ```
+
+2. Install the required packages:
+   ```
+   pip install -r requirements.txt
+   ```
+
+## Usage
+
+Run the Streamlit app:
+```
+streamlit run app/main.py
+```
+
+Navigate to the provided local URL in your web browser to use the app.
diff --git a/app/__innit__.py b/app/__innit__.py
diff --git a/app/data_loader.py b/app/data_loader.py
@@ -0,0 +1,20 @@
+import yfinance as yf
+import pandas as pd
+import streamlit as st
+
+@st.cache_data(ttl=86400)  # Cache data for 24 hours
+def load_data(ticker, start_date, end_date):
+    try:
+        stock_data = yf.download(ticker, start=start_date, end=end_date)
+        stock_data.reset_index(inplace=True)
+        return stock_data
+    except Exception as e:
+        st.error(f"Error fetching data: {str(e)}")
+        return pd.DataFrame()
+
+def validate_date(date_str):
+    try:
+        return pd.to_datetime(date_str)
+    except ValueError:
+        st.error("Invalid date format. Please use YYYY-MM-DD.")
+        return None
diff --git a/app/main.py b/app/main.py
@@ -0,0 +1,88 @@
+import streamlit as st
+import pandas as pd
+from data_loader import load_data, validate_date
+from model import load_model, predict_price, train_and_save_model, evaluate_model, train_test_split
+from utils import plot_stock_price, plot_model_performance, validate_ticker, validate_numeric_input
+
+# Set page config
+st.set_page_config(
+    page_title="Stock Market Prediction",
+    page_icon=":chart_with_upwards_trend:",
+    layout="wide"
+)
+
+# Title of the app
+st.title("Stock Market Prediction App📊")
+st.subheader("Using Random Forest🌳")
+
+# Sidebar: Stock selection and date range
+with st.sidebar:
+    st.header("Stock Selection")
+    stock_ticker = validate_ticker(st.text_input("Enter Stock Ticker Symbol", value='NVDA'))
+    start_date = st.date_input("Start Date", value=pd.to_datetime("2022-01-01"))
+    end_date = st.date_input("End Date", value=pd.to_datetime("2024-09-01"))
+
+# Load and display data
+with st.spinner("Fetching stock data..."):
+    hist = load_data(stock_ticker, start_date, end_date)
+
+if not hist.empty:
+    st.success("Data successfully loaded!")
+    st.write(f"Displaying data for: **{stock_ticker}**")
+
+    # Display stock price chart
+    fig = plot_stock_price(hist, stock_ticker)
+    st.plotly_chart(fig)
+
+    # Display historical data
+    st.write("**Filtered Historical Data** (sorted by Date)")
+    st.dataframe(hist.sort_values(by='Date'))
+
+    # Model training and evaluation
+    X = hist.drop(columns=['Date', 'Close', 'Adj Close'])
+    y = hist['Close']
+
+    regressor = load_model(stock_ticker)
+    if regressor is None:
+        regressor, X_test, y_test = train_and_save_model(X, y, stock_ticker)
+    else:
+        # If the model is loaded, we need to create X_test and y_test for evaluation
+        _, X_test, _, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+    # Evaluate the model
+    mse, rmse, mae, y_pred = evaluate_model(regressor, X_test, y_test)
+
+    # Display model performance metrics
+    st.subheader("Model Performance Metrics")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric("Mean Squared Error (MSE)", f"{mse:.4f}")
+    with col2:
+        st.metric("Root Mean Squared Error (RMSE)", f"{rmse:.4f}")
+    with col3:
+        st.metric("Mean Absolute Error (MAE)", f"{mae:.4f}")
+
+    # Plot model performance
+    plot_model_performance(y_test, y_pred)
+
+    # Prediction inputs
+    with st.sidebar:
+        st.header("Prediction Inputs")
+        open_price = validate_numeric_input(st.number_input("Open Price", min_value=0.0, step=0.1), "Open Price")
+        high_price = validate_numeric_input(st.number_input("High Price", min_value=0.0, step=0.1), "High Price")
+        low_price = validate_numeric_input(st.number_input("Low Price", min_value=0.0, step=0.1), "Low Price")
+        volume = validate_numeric_input(st.number_input("Volume", min_value=0, step=1), "Volume")
+
+    # Predict button
+    if st.sidebar.button("Predict Closing Price"):
+        if all([open_price, high_price, low_price, volume]):
+            prediction = predict_price(regressor, open_price, high_price, low_price, volume)
+            st.subheader(f"Predicted Closing Price for {stock_ticker}: {prediction:.2f}")
+
+            # Model performance
+            y_pred = regressor.predict(X)
+            plot_model_performance(y, y_pred)
+        else:
+            st.error("Please enter valid values for all inputs.")
+else:
+    st.error(f"Unable to load data for {stock_ticker}. Please check the ticker symbol.")
diff --git a/app/model.py b/app/model.py
@@ -0,0 +1,50 @@
+import os
+import joblib
+import streamlit as st
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+import pandas as pd
+import numpy as np
+
+def save_model(stock_ticker, model):
+    if not os.path.exists('models'):
+        os.makedirs('models')
+    filename = f"models/{stock_ticker}_model.pkl"
+    joblib.dump(model, filename)
+
+def load_model(stock_ticker):
+    filename = f"models/{stock_ticker}_model.pkl"
+    if os.path.exists(filename):
+        return joblib.load(filename)
+    else:
+        return None
+
+def train_and_save_model(X, y, stock_ticker):
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    
+    with st.spinner("Training new model..."):
+        regressor = RandomForestRegressor(n_estimators=100, random_state=42)
+        regressor.fit(X_train, y_train)
+    
+    save_model(stock_ticker, regressor)
+    st.success(f"Model training completed and saved for {stock_ticker}!")
+    
+    return regressor, X_test, y_test
+
+def predict_price(model, open_price, high_price, low_price, volume):
+    new_data = pd.DataFrame({
+        'Open': [open_price],
+        'High': [high_price],
+        'Low': [low_price],
+        'Volume': [volume]
+    })
+    prediction = model.predict(new_data)
+    return prediction[0]
+
+def evaluate_model(model, X_test, y_test):
+    y_pred = model.predict(X_test)
+    mse = mean_squared_error(y_test, y_pred)
+    rmse = np.sqrt(mse)
+    mae = mean_absolute_error(y_test, y_pred)
+    return mse, rmse, mae, y_pred
diff --git a/app/utils.py b/app/utils.py
@@ -0,0 +1,35 @@
+import streamlit as st
+import plotly.express as px
+import plotly.graph_objects as go
+
+def plot_stock_price(data, stock_ticker):
+    fig = px.line(
+        data,
+        x='Date',
+        y='Close',
+        title=f"{stock_ticker} Closing Price Over Time",
+        range_x=[data['Date'].min(), data['Date'].max()],
+    )
+    return fig
+
+def plot_model_performance(y_true, y_pred):
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(x=y_true, y=y_pred, mode='markers', name='Predictions'))
+    fig.add_trace(go.Scatter(x=[y_true.min(), y_true.max()], y=[y_true.min(), y_true.max()], 
+                             mode='lines', name='Ideal Prediction', line=dict(color='red', dash='dash')))
+    fig.update_layout(title='Actual vs Predicted Closing Prices',
+                      xaxis_title='Actual Price',
+                      yaxis_title='Predicted Price')
+    st.plotly_chart(fig)
+
+def validate_ticker(ticker):
+    if not ticker or not ticker.isalpha():
+        st.error("Invalid ticker. Please enter a valid stock symbol.")
+        return ""
+    return ticker.upper()
+
+def validate_numeric_input(value, name):
+    if value <= 0:
+        st.error(f"Invalid {name}. Please enter a positive number.")
+        return None
+    return value
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+streamlit==1.24.0
+pandas==1.5.3
+yfinance==0.2.18
+scikit-learn==1.2.2
+plotly==5.14.1
+joblib==1.2.0