diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..4a96c22 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +source .venv/bin/activate \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index f999880..f4e5d27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,33 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.2.1] - 2025-07-14 + +### Changed +- **BREAKING CHANGE**: Updated Python version requirement from `>=3.12` to `==3.10` for improved backwards compatibility +- Enhanced backwards compatibility by supporting Python 3.10 environments commonly used in enterprise and CI/CD systems + +### Fixed +- Fixed Python version compatibility issue that was unnecessarily blocking installation on Python 3.10 and 3.11 systems +- Resolved adoption barriers for users on older but still supported Python versions + +### Technical Notes +- All codebase features are fully compatible with Python 3.10 (ast.unparse, built-in generics, type hints) +- No Python 3.11+ or 3.12+ specific features are used in the implementation +- All dependencies support Python 3.10+ + +## [0.2.0] - 2025-07-11 + +### Changed +- **BREAKING CHANGE**: Renamed `AnalyzerCore` class to `Codeanalyzer` for better library naming consistency +- Refactored core class to support direct library import: `from codeanalyzer import Codeanalyzer` +- Updated all internal references and documentation to use the new class name +- Enhanced library interface for programmatic usage while maintaining CLI compatibility + +### Added +- Direct library import support allowing users to import and use `Codeanalyzer` as a library +- Proper `__all__` export in `__init__.py` for clean package interface + ## [0.1.5] - 2025-07-11 ### Fixed diff --git a/RELEASE.md b/RELEASE.md deleted file mode 100644 index b16aed8..0000000 --- a/RELEASE.md +++ /dev/null @@ -1,146 +0,0 @@ -## ๐ŸŽ‰ Codeanalyzer Python v0.1.0 Release - -**Python Static Analysis Backend for CodeLLM DevKit (CLDK)** - -Initial release of **Codeanalyzer Python**: A comprehensive static analysis tool designed specifically as the Python backend for the CodeLLM DevKit ecosystem. This tool provides deep code understanding capabilities through symbol table generation, with future support for call graph analysis and semantic analysis using industry-standard tools. - -### ๐Ÿš€ Key Features - -#### **Symbol Table Generation** -- **Complete AST Analysis**: Extracts classes, functions, variables, imports, and comments from Python source code -- **Type Inference**: Leverages Jedi for intelligent type inference and symbol resolution -- **Rich Metadata**: Captures cyclomatic complexity, parameter details, call sites, and code structure -- **Comprehensive Coverage**: Supports modules, classes, functions, variables, imports, and docstrings - -#### **Smart Project Processing** -- **Intelligent File Discovery**: Automatically excludes virtual environments, site-packages, and cache directories -- **Progress Tracking**: Beautiful Rich-based progress bars with real-time feedback -- **Error Resilience**: Continues processing on individual file failures with detailed error reporting -- **Caching Support**: Efficient caching system with customizable cache directories - -#### **Modern CLI Interface** -- **Rich Terminal UI**: Beautiful, colorful output with Rich integration -- **Flexible Logging**: Multiple verbosity levels (`-v`, `-vv`, `-vvv`) with structured logging -- **Multiple Output Formats**: JSON output to stdout or file -- **Comprehensive Options**: Eager/lazy analysis, cache management, and output control - -### ๐Ÿ› ๏ธ Technical Highlights - -#### **Built with Modern Python** -- **Python 3.12+**: Leverages latest Python features and type hints -- **uv Package Manager**: Fast, reliable dependency management -- **Pydantic Models**: Type-safe data structures with validation -- **Rich Progress Bars**: Non-blocking progress indication that preserves log output - -#### **Advanced Code Analysis** -- **Jedi Integration**: Professional-grade code intelligence and type inference -- **AST Processing**: Deep abstract syntax tree analysis -- **Builder Pattern**: Fluent, type-safe object construction -- **Comprehensive Schema**: Detailed Python code representation models - -#### **Production Ready** -- **Error Handling**: Graceful failure handling with detailed logging -- **Memory Efficient**: Processes large codebases without memory issues -- **Configurable**: Extensive customization options for different use cases -- **Well Tested**: Comprehensive test suite with CLI testing - -### ๐Ÿ“‹ Usage Examples - -**Basic Symbol Table Generation:** -```bash -uv run codeanalyzer --input ./my-python-project -``` - -**Save Results to File:** -```bash -uv run codeanalyzer --input ./project --output ./analysis-results -``` - -**Verbose Analysis with Custom Cache:** -```bash -uv run codeanalyzer --input ./project -vv --cache-dir ./custom-cache --eager -``` - -### ๐Ÿ”ง Installation - -```bash -# Clone the repository -git clone https://github.com/codellm-devkit/codeanalyzer-python -cd codeanalyzer-python - -# Install with uv -uv sync --all-groups - -# Run analysis -uv run codeanalyzer --input /path/to/your/project -``` - -### ๐ŸŽฏ What's Included - -#### **Core Modules** -- **`SymbolTableBuilder`**: Main analysis engine with comprehensive Python code parsing -- **`ProgressBar`**: Smart progress indication that respects logging levels -- **`PySchema`**: Rich data models for representing Python code structures -- **`AnalyzerCore`**: Central orchestration with caching and virtual environment support - -#### **Advanced Features** -- **Virtual Environment Detection**: Automatic Python environment discovery and setup -- **CodeQL Integration**: Foundation for future semantic analysis (in development) -- **Extensible Architecture**: Modular design ready for additional analysis backends - -### ๐Ÿ”ฎ Future Roadmap - -#### **Planned Features** -- **Call Graph Analysis** (`--analysis-level 2`): Complete function call relationship mapping -- **CodeQL Semantic Analysis**: Advanced code pattern detection and vulnerability analysis -- **WALA Integration**: Additional semantic analysis capabilities -- **Performance Optimizations**: Parallel processing and incremental analysis - -### ๐Ÿ—๏ธ Architecture Improvements in v0.1.0 - -#### **Logging System Overhaul** -- **Replaced Loguru with Rich Logging**: Better terminal integration and formatting -- **Centralized Logger**: Consistent logging across all modules -- **Progress-Aware Logging**: Error messages don't interfere with progress bars - -#### **Progress Bar Enhancement** -- **Rich Integration**: Beautiful, informative progress indication -- **Logger-Aware**: Automatically disables when logging level is high -- **Error Collection**: Batches error messages to display after progress completion - -#### **Dependency Management** -- **Switched from tqdm to Rich**: Unified UI framework -- **Cleaner Dependencies**: Removed redundant packages -- **Better Error Handling**: More robust dependency resolution - -### ๐Ÿงช Quality Assurance - -#### **Testing Infrastructure** -- **CLI Testing**: Comprehensive command-line interface validation -- **Symbol Table Testing**: Verification of analysis accuracy -- **Error Handling Tests**: Robust failure mode testing - -#### **Code Quality** -- **Type Safety**: Full type hints with mypy compatibility -- **Modern Python**: Leverages Python 3.12+ features -- **Clean Architecture**: Modular, testable design patterns - -### ๐ŸŽŠ Perfect for CodeLLM DevKit - -This release establishes CodeAnalyzer Python as the foundational static analysis backend for the CodeLLM DevKit ecosystem, providing: - -- **Structured Code Representation**: Rich JSON output perfect for LLM consumption -- **Comprehensive Metadata**: All the context needed for intelligent code understanding -- **Extensible Design**: Ready to integrate with additional CLDK tools and workflows -- **Production Scalability**: Handles enterprise-scale Python codebases efficiently - -### ๐Ÿ“– Documentation & Support - -- **Comprehensive README**: Detailed installation and usage instructions -- **Rich CLI Help**: Built-in help system with examples -- **Type-Safe APIs**: Full type hints for IDE integration -- **Open Source**: Apache 2.0 license with community contributions welcome - ---- - -*For issues, feature requests, or contributions, visit our [GitHub repository](https://github.com/codellm-devkit/codeanalyzer-python).* diff --git a/codeanalyzer/__init__.py b/codeanalyzer/__init__.py index e69de29..2f8688d 100644 --- a/codeanalyzer/__init__.py +++ b/codeanalyzer/__init__.py @@ -0,0 +1,5 @@ +"""Python code analyzer library.""" + +from codeanalyzer.core import Codeanalyzer + +__all__ = ["Codeanalyzer"] diff --git a/codeanalyzer/__main__.py b/codeanalyzer/__main__.py index a747b2c..ab400e3 100644 --- a/codeanalyzer/__main__.py +++ b/codeanalyzer/__main__.py @@ -1,16 +1,11 @@ from pathlib import Path from typing import Annotated, Optional -from enum import Enum import typer -from codeanalyzer.core import AnalyzerCore +from codeanalyzer.core import Codeanalyzer from codeanalyzer.utils import _set_log_level, logger - - -class OutputFormat(str, Enum): - JSON = "json" - MSGPACK = "msgpack" +from codeanalyzer.config import OutputFormat def main( @@ -67,7 +62,7 @@ def main( logger.error(f"Input path '{input}' does not exist.") raise typer.Exit(code=1) - with AnalyzerCore( + with Codeanalyzer( input, analysis_level, using_codeql, rebuild_analysis, cache_dir, clear_cache ) as analyzer: artifacts = analyzer.analyze() diff --git a/codeanalyzer/core.py b/codeanalyzer/core.py index a5e39ae..b135584 100644 --- a/codeanalyzer/core.py +++ b/codeanalyzer/core.py @@ -13,7 +13,7 @@ from codeanalyzer.utils import logger -class AnalyzerCore: +class Codeanalyzer: """Core functionality for CodeQL analysis. Args: @@ -196,7 +196,7 @@ def _get_base_interpreter() -> Path: f"a working Python interpreter that can create virtual environments." ) - def __enter__(self) -> "AnalyzerCore": + def __enter__(self) -> "Codeanalyzer": # If no virtualenv is provided, try to create one using requirements.txt or pyproject.toml venv_path = self.cache_dir / self.project_dir.name / "virtualenv" # Ensure the cache directory exists for this project diff --git a/codeanalyzer/syntactic_analysis/symbol_table_builder.py b/codeanalyzer/syntactic_analysis/symbol_table_builder.py index a60a68e..135c206 100644 --- a/codeanalyzer/syntactic_analysis/symbol_table_builder.py +++ b/codeanalyzer/syntactic_analysis/symbol_table_builder.py @@ -503,7 +503,9 @@ def build_param( return params - def _accessed_symbols(self, fn_node: ast.FunctionDef, script: Script) -> List[str]: + def _accessed_symbols( + self, fn_node: ast.FunctionDef, script: Script + ) -> List[PySymbol]: """Analyzes the function body to extract all accessed symbols.""" symbols = [] for node in ast.walk(fn_node): diff --git a/pyproject.toml b/pyproject.toml index ea5edf2..0f5e34a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,18 +1,18 @@ [project] name = "codeanalyzer-python" -version = "0.1.5" +version = "0.1.6" description = "Static Analysis on Python source code using Jedi, CodeQL and Treesitter." readme = "README.md" authors = [ { name = "Rahul Krishna", email = "i.m.ralk@gmail.com" } ] -requires-python = ">=3.12" +requires-python = "==3.10.*" dependencies = [ "jedi>=0.19.2", "loguru>=0.7.3", "msgpack>=1.1.1", - "networkx>=3.5", + "networkx>=3.4.2", "pandas>=2.3.1", "pydantic>=2.11.7", "requests>=2.32.4", @@ -86,4 +86,4 @@ exclude_lines = [ ] [tool.coverage.html] -directory = "htmlcov" \ No newline at end of file +directory = "htmlcov"