| | import streamlit as st |
| | import requests |
| | from src.github_analysis import analyze_github_repo |
| | from src.url_fetcher import fetch_url_title |
| | from src.fine_tune_helpers import fine_tune_model |
| |
|
| | |
| | st.title("OSINT Tool 🏢") |
| | st.markdown(""" |
| | This tool performs **Open Source Intelligence (OSINT)** analysis on GitHub repositories and fetches titles from URLs. |
| | It also allows uploading datasets (CSV format) for fine-tuning models like **DistilBERT**, **Code Summarization**, **Bug Fixing**, and more. |
| | """) |
| |
|
| | |
| | st.sidebar.title("Navigation") |
| | app_mode = st.sidebar.radio("Choose the mode", ["GitHub Repository Analysis", "URL Title Fetcher", "Dataset Upload & Fine-Tuning"]) |
| |
|
| | |
| | available_models = [ |
| | "semeru/code-text-galeras-code-summarization-3k-deduped", |
| | "semeru/code-code-InjectMutants", |
| | "semeru/code-code-BugFixingSmall", |
| | "semeru/code-code-GeneratingAssertsRaw", |
| | "deepseek-ai/DeepSeek-Prover-V1" |
| | ] |
| |
|
| | |
| | if app_mode == "GitHub Repository Analysis": |
| | st.header("GitHub Repository Analysis") |
| | repo_owner = st.text_input("Enter GitHub Repository Owner", "huggingface") |
| | repo_name = st.text_input("Enter GitHub Repository Name", "transformers") |
| | |
| | if st.button("Analyze Repository"): |
| | if repo_owner and repo_name: |
| | repo_data = analyze_github_repo(repo_owner, repo_name) |
| | if repo_data: |
| | st.subheader("Repository Details") |
| | for key, value in repo_data.items(): |
| | st.write(f"**{key}**: {value}") |
| | else: |
| | st.error("Failed to retrieve repository details.") |
| | else: |
| | st.warning("Please enter both repository owner and name.") |
| |
|
| | |
| | elif app_mode == "URL Title Fetcher": |
| | st.header("URL Title Fetcher") |
| | url = st.text_input("Enter URL", "https://www.huggingface.co") |
| | |
| | if st.button("Fetch Title"): |
| | if url: |
| | title = fetch_url_title(url) |
| | if title: |
| | st.write(f"**Page Title**: {title}") |
| | else: |
| | st.error("Failed to retrieve the page title.") |
| | else: |
| | st.warning("Please enter a valid URL.") |
| |
|
| | |
| | elif app_mode == "Dataset Upload & Fine-Tuning": |
| | st.header("Dataset Upload & Fine-Tuning") |
| | |
| | |
| | model_choice = st.selectbox("Choose Model for Fine-Tuning", available_models) |
| | |
| | |
| | uploaded_file = st.file_uploader("Upload a CSV file for fine-tuning", type="csv") |
| | |
| | if uploaded_file is not None: |
| | st.write(f"Preparing fine-tuning for model: **{model_choice}**") |
| | st.write("File successfully uploaded! Now starting fine-tuning process...") |
| | fine_tune_model(uploaded_file, model_choice) |
| |
|
| | |
| | def analyze_github_repo(owner, repo): |
| | """Analyzes a GitHub repository and returns information about it.""" |
| | try: |
| | response = requests.get(f'https://api.github.com/repos/{owner}/{repo}') |
| | response.raise_for_status() |
| | repo_data = response.json() |
| | return { |
| | "Repository Name": repo_data['name'], |
| | "Owner": repo_data['owner']['login'], |
| | "Stars": repo_data['stargazers_count'], |
| | "Forks": repo_data['forks_count'], |
| | "Issues": repo_data['open_issues_count'], |
| | "Language": repo_data['language'], |
| | "Description": repo_data.get('description', 'No description available.') |
| | } |
| | except requests.exceptions.RequestException as e: |
| | st.error(f"Error fetching GitHub repository: {e}") |
| | return None |
| |
|
| | def fetch_url_title(url): |
| | """Fetches the title of a webpage.""" |
| | try: |
| | response = requests.get(url) |
| | if response.status_code == 200: |
| | |
| | html_content = response.text |
| | start_index = html_content.find("<title>") + len("<title>") |
| | end_index = html_content.find("</title>") |
| | return html_content[start_index:end_index] |
| | else: |
| | st.error(f"Failed to fetch URL: {response.status_code}") |
| | return None |
| | except requests.exceptions.RequestException as e: |
| | st.error(f"Error fetching URL: {e}") |
| | return None |
| |
|