Source code for websweep.config

"""This module provides the WebSweep config functionality."""
import configparser
import shutil
from pathlib import Path
from typing import Optional

import typer

from websweep import DIR_ERROR, FILE_ERROR, SUCCESS, __app_name__

CONFIG_DIR_PATH = Path(typer.get_app_dir(__app_name__))
CONFIG_FILE_PATH = CONFIG_DIR_PATH / "config.ini"


def _truncate_section(config_file: Path, section: str) -> None:
    """Remove a section from an INI file when it exists."""
    config_parser = configparser.ConfigParser()

    with open(config_file, "r") as f:
        config_parser.read_file(f)

    config_parser.remove_section(section)

    try:
        with config_file.open("w") as file:
            config_parser.write(file)
    except OSError:
        pass


[docs] def current_websweep_instance() -> Path: """Return the current websweep location""" try: config_parser = configparser.ConfigParser() config_parser.read(CONFIG_FILE_PATH) return Path(config_parser["Instance"]["location"]) except (KeyError, configparser.Error): return CONFIG_DIR_PATH
[docs] def init_app( target_folder_path: str, source_file_path: str, extractor_delete_files: bool, use_database: bool, extractor_addon_file: Optional[Path] = None, ) -> int: """Initialize the application.""" # create the application config file location, config file and add the location of the WebSweep instance config_code = _init_application_config_file(Path(target_folder_path)) if config_code != SUCCESS: return config_code # create the WebSweep instance folder and create the settings file settings_code = _create_settings_file() if settings_code != SUCCESS: return settings_code # create data folder and add the location of the WebSweep instance to the settings file target_folder_code = _init_target_folder(Path(target_folder_path)) if target_folder_code != SUCCESS: return target_folder_code # add the source file location to the settings file source_file_code = _save_source_file(Path(source_file_path)) if source_file_code != SUCCESS: return source_file_code copied_extractor_addon_file = None if extractor_addon_file is not None: copied_extractor_addon_file = _copy_extractor_addon_to_instance( Path(extractor_addon_file) ) if copied_extractor_addon_file is None: return FILE_ERROR extractor_delete_files_code = _save_extractor_settings( extractor_delete_files=extractor_delete_files, extractor_addon_file=copied_extractor_addon_file, ) if extractor_delete_files_code != SUCCESS: return extractor_delete_files_code use_database = _save_use_database(use_database) if use_database != SUCCESS: return use_database return SUCCESS
def _init_application_config_file(location: Path) -> int: """Create/update the global config file and store active instance location.""" try: CONFIG_DIR_PATH.mkdir(exist_ok=True, parents=True) except OSError: return DIR_ERROR try: Path(CONFIG_FILE_PATH).touch(exist_ok=True) except OSError: return FILE_ERROR _truncate_section(CONFIG_FILE_PATH, "Instance") config_parser = configparser.ConfigParser() config_parser.add_section("Instance") config_parser.set("Instance", "location", str(location)) try: with CONFIG_FILE_PATH.open("a") as file: config_parser.write(file) except OSError: return FILE_ERROR return SUCCESS def _init_target_folder(target_folder_path: Path) -> int: """Initialize instance folders and write the local ``settings.ini`` pointer.""" try: (target_folder_path / "crawled_data").mkdir(exist_ok=True, parents=True) except OSError: return DIR_ERROR _truncate_section(target_folder_path / "settings.ini", "Instance") config_parser = configparser.ConfigParser() config_parser.add_section("Instance") config_parser.set("Instance", "location", str(target_folder_path)) try: with (target_folder_path / "settings.ini").open("a") as file: config_parser.write(file) except OSError: return FILE_ERROR return SUCCESS def _create_settings_file() -> int: """Ensure the active instance has a ``settings.ini`` file.""" try: instance_path = current_websweep_instance() instance_path.mkdir(exist_ok=True, parents=True) Path(instance_path / "settings.ini").touch(exist_ok=True) except OSError: return FILE_ERROR return SUCCESS
[docs] def restore_app(target_folder_path: Path) -> int: """Restore existing application.""" if not Path.is_dir(target_folder_path): return DIR_ERROR if not Path.is_file(target_folder_path / "settings.ini"): return FILE_ERROR # create the application config file location, config file and add the location of the WebSweep instance config_code = _init_application_config_file(target_folder_path) if config_code != SUCCESS: return config_code try: get_target_folder_path() get_source_file_path() get_extractor_delete() get_extractor_addon_file() except Exception: return FILE_ERROR return SUCCESS
[docs] def get_target_folder_path( config_file: Path = None, ) -> Path: """Return the current WebSweep instance location path""" if config_file is None: config_file = current_websweep_instance() / "settings.ini" config_parser = configparser.ConfigParser() config_parser.read(config_file) return Path(config_parser["Instance"]["location"])
def _save_source_file(source_file_path: Path) -> int: """Persist the source URL file path in ``settings.ini``.""" _truncate_section(current_websweep_instance() / "settings.ini", "Source") config_parser = configparser.ConfigParser() config_parser.add_section("Source") config_parser.set("Source", "source_file", str(source_file_path)) try: with (current_websweep_instance() / "settings.ini").open("a") as file: config_parser.write(file) except OSError: return FILE_ERROR return SUCCESS
[docs] def get_source_file_path( config_file: Path = None, ) -> Path: """Return the current source file path""" if config_file is None: config_file = current_websweep_instance() / "settings.ini" try: config_parser = configparser.ConfigParser() config_parser.read(config_file) return Path(config_parser["Source"]["source_file"]) except (KeyError, configparser.Error): return None
def _parse_bool(value, default: bool) -> bool: """Parse flexible string/boolean config values with a default fallback.""" if isinstance(value, bool): return value if value is None: return default normalized = str(value).strip().lower() if normalized in {"1", "true", "yes", "on"}: return True if normalized in {"0", "false", "no", "off"}: return False return default def _save_extractor_settings( extractor_delete_files: bool, extractor_addon_file: Optional[Path] = None, ) -> int: """Persist extractor-related settings in the ``Extractor`` section.""" _truncate_section(current_websweep_instance() / "settings.ini", "Extractor") config_parser = configparser.ConfigParser() config_parser.add_section("Extractor") config_parser.set( "Extractor", "extractor_delete_files", str(extractor_delete_files) ) config_parser.set( "Extractor", "extractor_addon_file", "" if extractor_addon_file is None else str(Path(extractor_addon_file)), ) try: with (current_websweep_instance() / "settings.ini").open("a") as file: config_parser.write(file) except OSError: return FILE_ERROR return SUCCESS def _copy_extractor_addon_to_instance(extractor_addon_file: Path) -> Optional[Path]: """Copy add-on extractor file into the active instance folder.""" source = Path(extractor_addon_file).expanduser().resolve() if not source.exists() or not source.is_file(): return None destination = current_websweep_instance() / "extractor_addon.py" try: if source != destination: shutil.copy2(source, destination) except OSError: return None return destination def _save_extractor_delete(extractor_delete_files: bool) -> int: """Persist extractor cleanup preference in ``settings.ini``.""" return _save_extractor_settings( extractor_delete_files=extractor_delete_files, extractor_addon_file=get_extractor_addon_file(), )
[docs] def get_extractor_delete( config_file: Path = None, ) -> bool: """ Return whether to delete processed raw files """ if config_file is None: config_file = current_websweep_instance() / "settings.ini" config_parser = configparser.ConfigParser() config_parser.read(config_file) value = config_parser.get("Extractor", "extractor_delete_files", fallback=None) return _parse_bool(value, default=False)
[docs] def get_extractor_addon_file( config_file: Path = None, ) -> Optional[Path]: """Return the configured extractor add-on file path or ``None``.""" if config_file is None: config_file = current_websweep_instance() / "settings.ini" config_parser = configparser.ConfigParser() config_parser.read(config_file) raw_value = config_parser.get("Extractor", "extractor_addon_file", fallback="") value = str(raw_value).strip() if value == "": return None return Path(value)
def _save_use_database(use_database: bool) -> int: """Persist whether crawl overview data should use a DB backend.""" _truncate_section(current_websweep_instance() / "settings.ini", "Database") config_parser = configparser.ConfigParser() config_parser.add_section('Database') config_parser.set('Database', 'use_database', str(use_database)) try: with (current_websweep_instance() / "settings.ini").open("a") as file: config_parser.write(file) except OSError: return FILE_ERROR return SUCCESS
[docs] def get_use_database( config_file: Path = None, ) -> bool: """ Return whether overview data should use a database backend. """ if config_file is None: config_file = current_websweep_instance() / "settings.ini" config_parser = configparser.ConfigParser() config_parser.read(config_file) value = config_parser.get("Database", "use_database", fallback=None) return _parse_bool(value, default=True)