Scanner API Reference

Scanner for finding trace tags in source code files.

scan_directory(directory)

Recursively scans a directory for trace tags, respecting .gitignore. Returns traces and file (line counts, is_disabled).

Source code in reqtrace/analysis/scanner.py
def scan_directory(directory: Union[str, Path]) -> Tuple[List[TraceMatch], Dict[str, Tuple[int, bool]]]:
    """Recursively scans a directory for trace tags, respecting .gitignore. Returns traces and file (line counts, is_disabled)."""
    # @trace-start: REQ-SCAN-DIR
    dir_path = Path(directory)
    if not dir_path.is_dir():
        raise ValueError(f"'{dir_path}' is not a directory")

    spec = _get_ignore_spec(dir_path)
    all_matches = []
    file_lines: Dict[str, Tuple[int, bool]] = {}

    for root, dirs, files in os.walk(dir_path):
        root_path = Path(root)

        # Filter directories in-place based on pathspec
        # We need the path relative to the base directory
        dirs[:] = [d for d in dirs if not spec.match_file(str((root_path / d).relative_to(dir_path)) + "/")]

        for file in files:
            # Skip .gitignore files themselves from being counted as source code
            if file == ".gitignore":
                continue

            file_rel_path = str((root_path / file).relative_to(dir_path))
            if not spec.match_file(file_rel_path):
                filepath = root_path / file
                matches, lines, is_disabled = scan_file(filepath)
                all_matches.extend(matches)
                file_lines[str(filepath)] = (lines, is_disabled)

    # @trace-end: REQ-SCAN-DIR
    return all_matches, file_lines

scan_file(filepath)

Scans a single text file for requirement trace tags. Returns traces and total line count.

Source code in reqtrace/analysis/scanner.py
def scan_file(filepath: Union[str, Path]) -> Tuple[List[TraceMatch], int, bool]:
    """Scans a single text file for requirement trace tags. Returns traces and total line count."""
    # @trace-start: REQ-SCAN-FILE
    # @trace-start: REQ-SCAN-REGEX
    path = Path(filepath)
    matches = []
    open_traces = {}
    total_lines = 0
    is_disabled = False

    try:
        with open(path, "r", encoding="utf-8") as f:
            for line_no, line in enumerate(f, start=1):
                total_lines = line_no

                if not is_disabled and DISABLE_TAG_PATTERN.search(line):
                    is_disabled = True
                    # Even if disabled, we continue parsing to find total line count,
                    # but we won't return any trace matches found inside this file.

                for match in START_TAG_PATTERN.finditer(line):
                    req_id = match.group(1)
                    pct_str = match.group(2)
                    percentage = int(pct_str) if pct_str else None
                    open_traces[req_id] = (line_no, percentage)

                for match in END_TAG_PATTERN.finditer(line):
                    req_id = match.group(1)
                    if req_id in open_traces:
                        start_line, percentage = open_traces.pop(req_id)
                        matches.append(
                            TraceMatch(
                                file_path=str(path),
                                line_start=start_line,
                                line_end=line_no,
                                req_id=req_id,
                                percentage=percentage,
                            )
                        )
    except UnicodeDecodeError:
        # Ignore binary files or files with strange encodings
        pass

    # @trace-end: REQ-SCAN-REGEX
    # @trace-end: REQ-SCAN-FILE
    if is_disabled:
        return [], total_lines, True
    return matches, total_lines, False