From 752d2d9ee95cf3c88a6885792731fafc2393ec6d Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Mon, 18 May 2026 14:46:02 +0200
Subject: [PATCH 1/6] style(scripts): apply Ruff formatter and import sorting

---
 krakenparser/__init__.py                 |  2 +-
 krakenparser/counts/convert2csv.py       |  1 +
 krakenparser/counts/processing_script.py |  5 +--
 krakenparser/counts/split_mpa.py         | 30 ++++++++------
 krakenparser/kpplot/base.py              |  7 ++--
 krakenparser/kpplot/clustermap.py        |  6 ++-
 krakenparser/kpplot/stackedbar.py        |  8 ++--
 krakenparser/kpplot/streamgraph.py       |  8 ++--
 krakenparser/krakenparser.py             | 35 ++++++++++------
 krakenparser/mpa/mpa_table.py            | 13 ++++--
 krakenparser/mpa/transform2mpa.py        | 52 ++++++++++++++++--------
 krakenparser/pipeline.py                 | 43 +++++++++++++-------
 krakenparser/stats/diversity.py          | 39 +++++++++++++-----
 krakenparser/stats/relabund.py           |  7 ++--
 tests/conftest.py                        | 38 ++++++++++-------
 tests/test_full_pipeline.py              |  5 ++-
 tests/test_integration.py                | 25 ++++++++----
 tests/test_kpplot.py                     | 19 +++++----
 tests/test_units.py                      | 13 +++---
 19 files changed, 226 insertions(+), 130 deletions(-)

diff --git a/krakenparser/__init__.py b/krakenparser/__init__.py
index 1c1f724..2905ccb 100755
--- a/krakenparser/__init__.py
+++ b/krakenparser/__init__.py
@@ -1,6 +1,6 @@
+from .kpplot.clustermap import clustermap
 from .kpplot.stackedbar import stacked_barplot
 from .kpplot.streamgraph import streamgraph
-from .kpplot.clustermap import clustermap
 
 __all__ = [
     "stacked_barplot",
diff --git a/krakenparser/counts/convert2csv.py b/krakenparser/counts/convert2csv.py
index 835ffc3..1177b91 100755
--- a/krakenparser/counts/convert2csv.py
+++ b/krakenparser/counts/convert2csv.py
@@ -3,6 +3,7 @@
 import argparse
 import logging
 from pathlib import Path
+
 import pandas as pd
 
 _log = logging.getLogger(__name__)
diff --git a/krakenparser/counts/processing_script.py b/krakenparser/counts/processing_script.py
index b2b28ff..831910e 100755
--- a/krakenparser/counts/processing_script.py
+++ b/krakenparser/counts/processing_script.py
@@ -1,9 +1,8 @@
 #!/usr/bin/env python
 
+import argparse
 import os
-import sys
 import tempfile
-import argparse
 from pathlib import Path
 
 
@@ -11,7 +10,7 @@ def modify_taxa_names(line):
     prefixes = ["s__", "g__", "f__", "o__", "c__", "p__"]
     for prefix in prefixes:
         if line.startswith(prefix):
-            parts = line[len(prefix):].split("\t")
+            parts = line[len(prefix) :].split("\t")
             parts[0] = parts[0].replace("_", " ")
             return "\t".join(parts)
     return line
diff --git a/krakenparser/counts/split_mpa.py b/krakenparser/counts/split_mpa.py
index 72b4e07..0ee87b8 100644
--- a/krakenparser/counts/split_mpa.py
+++ b/krakenparser/counts/split_mpa.py
@@ -7,7 +7,6 @@
 import argparse
 import logging
 import re
-import sys
 from pathlib import Path
 
 _log = logging.getLogger(__name__)
@@ -15,20 +14,20 @@
 
 _RANKS = [
     ("species", "s__", []),
-    ("genus",   "g__", ["s__"]),
-    ("family",  "f__", ["s__", "g__"]),
-    ("order",   "o__", ["s__", "g__", "f__"]),
-    ("class",   "c__", ["s__", "g__", "f__", "o__"]),
-    ("phylum",  "p__", ["s__", "g__", "f__", "o__", "c__"]),
+    ("genus", "g__", ["s__"]),
+    ("family", "f__", ["s__", "g__"]),
+    ("order", "o__", ["s__", "g__", "f__"]),
+    ("class", "c__", ["s__", "g__", "f__", "o__"]),
+    ("phylum", "p__", ["s__", "g__", "f__", "o__", "c__"]),
 ]
 
 _HUMAN_TAXA = {
     "species": "s__Homo_sapiens",
-    "genus":   "g__Homo",
-    "family":  "f__Hominidae",
-    "order":   "o__Primates",
-    "class":   "c__Mammalia",
-    "phylum":  "p__Chordata",
+    "genus": "g__Homo",
+    "family": "f__Hominidae",
+    "order": "o__Primates",
+    "class": "c__Mammalia",
+    "phylum": "p__Chordata",
 }
 
 _ACCESSION_RE = re.compile(r"(SRS|SRR|SRX|ERS|ERR|ERX|DRS|DRR|DRX)\d*-")
@@ -41,7 +40,7 @@ def _strip_path_prefix(line: str) -> str:
         return line
     path, rest = line[:tab], line[tab:]
     pipe = path.rfind("|")
-    segment = path[pipe + 1:] if pipe != -1 else path
+    segment = path[pipe + 1 :] if pipe != -1 else path
     return _ACCESSION_RE.sub("", segment + rest)
 
 
@@ -105,7 +104,12 @@ def main() -> None:
         help="Do not filter human-related taxa (default: filtered)",
     )
     args = parser.parse_args()
-    split_mpa(args.input, args.output, viruses_only=args.viruses_only, keep_human=args.keep_human)
+    split_mpa(
+        args.input,
+        args.output,
+        viruses_only=args.viruses_only,
+        keep_human=args.keep_human,
+    )
 
 
 if __name__ == "__main__":
diff --git a/krakenparser/kpplot/base.py b/krakenparser/kpplot/base.py
index 2df3f0c..8d96aba 100644
--- a/krakenparser/kpplot/base.py
+++ b/krakenparser/kpplot/base.py
@@ -1,6 +1,7 @@
+from typing import Optional
+
 import matplotlib.pyplot as plt
 import pandas as pd
-from typing import Optional
 
 
 class KpPlotBase:
@@ -44,9 +45,7 @@ def aggregate_by_metadata(
         raise ValueError("metadata must contain 'Sample_id' column")
     if metadata_group not in metadata.columns:
         raise ValueError(f"'{metadata_group}' column not found in metadata")
-    df = df.merge(
-        metadata[["Sample_id", metadata_group]], on="Sample_id", how="left"
-    )
+    df = df.merge(metadata[["Sample_id", metadata_group]], on="Sample_id", how="left")
     df = (
         df.groupby([metadata_group, "taxon"], as_index=False)["rel_abund_perc"]
         .mean()
diff --git a/krakenparser/kpplot/clustermap.py b/krakenparser/kpplot/clustermap.py
index f0ad5bb..704bb73 100644
--- a/krakenparser/kpplot/clustermap.py
+++ b/krakenparser/kpplot/clustermap.py
@@ -1,7 +1,9 @@
-import pandas as pd
+from typing import List, Optional, Tuple
+
 import matplotlib.pyplot as plt
+import pandas as pd
 import seaborn as sns
-from typing import Optional, Tuple, Union, List
+
 from .base import KpPlotBase, aggregate_by_metadata
 
 
diff --git a/krakenparser/kpplot/stackedbar.py b/krakenparser/kpplot/stackedbar.py
index 8140437..f86cbcc 100644
--- a/krakenparser/kpplot/stackedbar.py
+++ b/krakenparser/kpplot/stackedbar.py
@@ -1,8 +1,10 @@
-import pandas as pd
+from typing import List, Optional, Tuple, Union
+
 import matplotlib.pyplot as plt
-import seaborn as sns
 import numpy as np
-from typing import Optional, Tuple, Union, List
+import pandas as pd
+import seaborn as sns
+
 from .base import KpPlotBase, aggregate_by_metadata
 
 
diff --git a/krakenparser/kpplot/streamgraph.py b/krakenparser/kpplot/streamgraph.py
index 6c389a2..f858be0 100644
--- a/krakenparser/kpplot/streamgraph.py
+++ b/krakenparser/kpplot/streamgraph.py
@@ -1,8 +1,10 @@
-import pandas as pd
+from typing import List, Optional, Tuple, Union
+
 import matplotlib.pyplot as plt
-import seaborn as sns
 import numpy as np
-from typing import Optional, Tuple, Union, List
+import pandas as pd
+import seaborn as sns
+
 from .base import KpPlotBase, aggregate_by_metadata
 
 
diff --git a/krakenparser/krakenparser.py b/krakenparser/krakenparser.py
index 6a560ce..28e94bf 100755
--- a/krakenparser/krakenparser.py
+++ b/krakenparser/krakenparser.py
@@ -1,9 +1,11 @@
 import argparse
 import logging
 import subprocess
-from pathlib import Path
 import sys
-from importlib.metadata import version as _pkg_version, PackageNotFoundError as _PNF
+from importlib.metadata import PackageNotFoundError as _PNF
+from importlib.metadata import version as _pkg_version
+from pathlib import Path
+
 try:
     __version__ = _pkg_version("krakenparser")
 except _PNF:
@@ -78,15 +80,18 @@ def main():
 
     # Map flags to (script_path, base_args_to_prepend)
     command_map = {
-        "complete":           (package_dir / "pipeline.py",                      []),
-        "kreport2mpa":        (package_dir / "mpa" / "transform2mpa.py",         []),
-        "combine_mpa":        (package_dir / "mpa" / "mpa_table.py",             []),
-        "deconstruct":        (package_dir / "counts" / "split_mpa.py",          []),
-        "deconstruct_viruses":(package_dir / "counts" / "split_mpa.py",          ["--viruses-only"]),
-        "process":            (package_dir / "counts" / "processing_script.py",  []),
-        "txt2csv":            (package_dir / "counts" / "convert2csv.py",        []),
-        "relabund":           (package_dir / "stats" / "relabund.py",            []),
-        "diversity":          (package_dir / "stats" / "diversity.py",           []),
+        "complete": (package_dir / "pipeline.py", []),
+        "kreport2mpa": (package_dir / "mpa" / "transform2mpa.py", []),
+        "combine_mpa": (package_dir / "mpa" / "mpa_table.py", []),
+        "deconstruct": (package_dir / "counts" / "split_mpa.py", []),
+        "deconstruct_viruses": (
+            package_dir / "counts" / "split_mpa.py",
+            ["--viruses-only"],
+        ),
+        "process": (package_dir / "counts" / "processing_script.py", []),
+        "txt2csv": (package_dir / "counts" / "convert2csv.py", []),
+        "relabund": (package_dir / "stats" / "relabund.py", []),
+        "diversity": (package_dir / "stats" / "diversity.py", []),
     }
 
     if "-h" in sys.argv or "--help" in sys.argv:
@@ -94,7 +99,9 @@ def main():
             parser.print_help()
             return
 
-    def _build_cmd(script: Path, base_args: list[str], user_args: list[str]) -> list[str]:
+    def _build_cmd(
+        script: Path, base_args: list[str], user_args: list[str]
+    ) -> list[str]:
         if script.suffix == ".py":
             # Run as module (-m) so the krakenparser package stays importable.
             # Derive dotted module name from path relative to the package root.
@@ -113,7 +120,9 @@ def _build_cmd(script: Path, base_args: list[str], user_args: list[str]) -> list
     # Default to full pipeline when -i/--input is given without a subcommand
     if "-i" in extra_args or "--input" in extra_args:
         complete_script, complete_base = command_map["complete"]
-        subprocess.run(_build_cmd(complete_script, complete_base, extra_args), check=True)
+        subprocess.run(
+            _build_cmd(complete_script, complete_base, extra_args), check=True
+        )
         return
 
     parser.print_help()
diff --git a/krakenparser/mpa/mpa_table.py b/krakenparser/mpa/mpa_table.py
index f66e63b..c808383 100644
--- a/krakenparser/mpa/mpa_table.py
+++ b/krakenparser/mpa/mpa_table.py
@@ -53,13 +53,18 @@ def main() -> None:
         description="Combine MPA files into a single tab-delimited table."
     )
     parser.add_argument(
-        "-i", "--input",
-        required=True, nargs="+", dest="in_files",
+        "-i",
+        "--input",
+        required=True,
+        nargs="+",
+        dest="in_files",
         help="Input MPA files (one per sample)",
     )
     parser.add_argument(
-        "-o", "--output",
-        required=True, dest="o_file",
+        "-o",
+        "--output",
+        required=True,
+        dest="o_file",
         help="Output merged MPA file",
     )
     args = parser.parse_args()
diff --git a/krakenparser/mpa/transform2mpa.py b/krakenparser/mpa/transform2mpa.py
index 480a1f9..a28cde6 100644
--- a/krakenparser/mpa/transform2mpa.py
+++ b/krakenparser/mpa/transform2mpa.py
@@ -8,8 +8,14 @@
 
 # Maps Kraken2 single-letter rank codes to MPA prefixes
 _RANK_PREFIX = {
-    "D": "d", "K": "k", "P": "p", "C": "c",
-    "O": "o", "F": "f", "G": "g", "S": "s",
+    "D": "d",
+    "K": "k",
+    "P": "p",
+    "C": "c",
+    "O": "o",
+    "F": "f",
+    "G": "g",
+    "S": "s",
 }
 
 
@@ -96,9 +102,7 @@ def kreport_to_mpa(
 
             # Build the full MPA path; omit intermediate (x__) segments when not requested
             path = "|".join(
-                seg
-                for (_, seg, std) in stack
-                if include_intermediate or std
+                seg for (_, seg, std) in stack if include_intermediate or std
             )
 
             value = str(cum_reads) if use_reads else str(pct)
@@ -112,55 +116,71 @@ def main() -> None:
 
     mode = parser.add_mutually_exclusive_group(required=True)
     mode.add_argument(
-        "-r", "--report-file", "--report",
+        "-r",
+        "--report-file",
+        "--report",
         dest="r_file",
         help="Single input Kraken2 report file",
     )
     mode.add_argument(
-        "-i", "--input",
+        "-i",
+        "--input",
         dest="input_dir",
         help="Input directory containing Kraken2 report files (batch mode)",
     )
 
     parser.add_argument(
-        "-o", "--output",
-        required=True, dest="o_file",
+        "-o",
+        "--output",
+        required=True,
+        dest="o_file",
         help="Output MPA file (single mode) or output directory (batch mode)",
     )
     parser.add_argument(
         "--display-header",
-        action="store_true", dest="add_header", default=False,
+        action="store_true",
+        dest="add_header",
+        default=False,
         help="Write a header line with the sample name (filename)",
     )
     parser.add_argument(
         "--read_count",
-        action="store_true", dest="use_reads", default=True,
+        action="store_true",
+        dest="use_reads",
+        default=True,
         help="Output clade read counts [default]",
     )
     parser.add_argument(
         "--percentages",
-        action="store_false", dest="use_reads",
+        action="store_false",
+        dest="use_reads",
         help="Output percentages instead of read counts",
     )
     parser.add_argument(
         "--intermediate-ranks",
-        action="store_true", dest="x_include", default=False,
+        action="store_true",
+        dest="x_include",
+        default=False,
         help="Include non-standard taxonomic ranks in output",
     )
     parser.add_argument(
         "--no-intermediate-ranks",
-        action="store_false", dest="x_include",
+        action="store_false",
+        dest="x_include",
         help="Exclude non-standard taxonomic ranks [default]",
     )
     group = parser.add_mutually_exclusive_group()
     group.add_argument(
         "--remove-spaces",
-        action="store_true", dest="remove_spaces", default=True,
+        action="store_true",
+        dest="remove_spaces",
+        default=True,
         help="Replace spaces with underscores in taxon names [default]",
     )
     group.add_argument(
         "--keep-spaces",
-        action="store_false", dest="remove_spaces",
+        action="store_false",
+        dest="remove_spaces",
         help="Keep spaces in taxon names",
     )
     args = parser.parse_args()
diff --git a/krakenparser/pipeline.py b/krakenparser/pipeline.py
index 02ca6a7..c54b4e9 100644
--- a/krakenparser/pipeline.py
+++ b/krakenparser/pipeline.py
@@ -11,13 +11,13 @@
 
 import pandas as pd
 
-from krakenparser.mpa.transform2mpa import kreport_to_mpa
-from krakenparser.mpa.mpa_table import combine_mpa
-from krakenparser.counts.split_mpa import split_mpa
-from krakenparser.counts.processing_script import process_files
 from krakenparser.counts.convert2csv import convert_to_csv
-from krakenparser.stats.relabund import calculate_rel_abund
+from krakenparser.counts.processing_script import process_files
+from krakenparser.counts.split_mpa import split_mpa
+from krakenparser.mpa.mpa_table import combine_mpa
+from krakenparser.mpa.transform2mpa import kreport_to_mpa
 from krakenparser.stats.diversity import calc_alpha_div, calc_beta_div
+from krakenparser.stats.relabund import calculate_rel_abund
 
 
 def _is_processable(path: Path) -> bool:
@@ -124,36 +124,49 @@ def run_pipeline(
 
 def main() -> None:
     logging.basicConfig(level=logging.INFO, format="%(message)s")
-    parser = argparse.ArgumentParser(
-        description="Run the full KrakenParser pipeline."
-    )
+    parser = argparse.ArgumentParser(description="Run the full KrakenParser pipeline.")
     parser.add_argument(
-        "-i", "--input", required=True,
+        "-i",
+        "--input",
+        required=True,
         help="Directory containing Kraken2 report files",
     )
     parser.add_argument(
-        "-o", "--output", default=None,
+        "-o",
+        "--output",
+        default=None,
         help="Output directory (default: parent of input)",
     )
     parser.add_argument(
-        "--keep-human", action="store_true", default=False,
+        "--keep-human",
+        action="store_true",
+        default=False,
         help="Do not filter human-related taxa (default: filtered)",
     )
     parser.add_argument(
-        "-d", "--depth", type=int, default=1000,
+        "-d",
+        "--depth",
+        type=int,
+        default=1000,
         help="Rarefaction depth for β-diversity (default: 1000)",
     )
     parser.add_argument(
-        "-s", "--seed", type=int, default=None,
+        "-s",
+        "--seed",
+        type=int,
+        default=None,
         help="Random seed for reproducible rarefaction (default: random)",
     )
     parser.add_argument(
-        "--overwrite", action="store_true", default=False,
+        "--overwrite",
+        action="store_true",
+        default=False,
         help="Overwrite the output directory if it already exists",
     )
     args = parser.parse_args()
     run_pipeline(
-        args.input, args.output,
+        args.input,
+        args.output,
         keep_human=args.keep_human,
         rarefaction_depth=args.depth,
         seed=args.seed,
diff --git a/krakenparser/stats/diversity.py b/krakenparser/stats/diversity.py
index ac3c87f..dd76098 100644
--- a/krakenparser/stats/diversity.py
+++ b/krakenparser/stats/diversity.py
@@ -34,7 +34,9 @@ def chao1_index(counts):
     return S_obs + (F1 * F1) / (2 * F2)
 
 
-def _subsample_counts(counts: np.ndarray, n: int, rng: np.random.Generator) -> np.ndarray:
+def _subsample_counts(
+    counts: np.ndarray, n: int, rng: np.random.Generator
+) -> np.ndarray:
     """Rarefy counts to n reads by sampling without replacement."""
     indices = np.repeat(np.arange(len(counts)), counts)
     sampled = rng.choice(indices, size=n, replace=False)
@@ -76,11 +78,13 @@ def calc_beta_div(df, output_path, rarefaction_depth, seed=None):
 
     bray_df = pd.DataFrame(
         squareform(pdist(X, metric="braycurtis")),
-        index=sample_ids, columns=sample_ids,
+        index=sample_ids,
+        columns=sample_ids,
     )
     jaccard_df = pd.DataFrame(
         squareform(pdist(X.astype(bool).astype(float), metric="jaccard")),
-        index=sample_ids, columns=sample_ids,
+        index=sample_ids,
+        columns=sample_ids,
     )
 
     bray_df.to_csv(output_path / "beta_div_bray.csv")
@@ -89,14 +93,27 @@ def calc_beta_div(df, output_path, rarefaction_depth, seed=None):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Calculate α & β-diversities.")
-    parser.add_argument("-i", "--input", required=True,
-                        help="Input total count table CSV (species level).")
-    parser.add_argument("-o", "--output", required=True,
-                        help="Output directory path.")
-    parser.add_argument("-d", "--depth", type=int, default=1000,
-                        help="Rarefaction depth for β diversity (default: 1000).")
-    parser.add_argument("-s", "--seed", type=int, default=None,
-                        help="Random seed for reproducible rarefaction (default: random).")
+    parser.add_argument(
+        "-i",
+        "--input",
+        required=True,
+        help="Input total count table CSV (species level).",
+    )
+    parser.add_argument("-o", "--output", required=True, help="Output directory path.")
+    parser.add_argument(
+        "-d",
+        "--depth",
+        type=int,
+        default=1000,
+        help="Rarefaction depth for β diversity (default: 1000).",
+    )
+    parser.add_argument(
+        "-s",
+        "--seed",
+        type=int,
+        default=None,
+        help="Random seed for reproducible rarefaction (default: random).",
+    )
     args = parser.parse_args()
 
     input_file = Path(args.input)
diff --git a/krakenparser/stats/relabund.py b/krakenparser/stats/relabund.py
index 171f280..5b1b0c5 100644
--- a/krakenparser/stats/relabund.py
+++ b/krakenparser/stats/relabund.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python
 
+import argparse
 import logging
 import warnings
-import argparse
 from pathlib import Path
+
 import pandas as pd
 
 _log = logging.getLogger(__name__)
@@ -59,9 +60,7 @@ def calculate_rel_abund(input_file, output_file, other_threshold=None):
 
     # Save to CSV
     result.to_csv(output_file, index=False)
-    _log.info(
-        "Relative abundance saved as '%s'.", output_file
-    )
+    _log.info("Relative abundance saved as '%s'.", output_file)
 
 
 if __name__ == "__main__":
diff --git a/tests/conftest.py b/tests/conftest.py
index 8100a3e..f681d4f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,10 +1,10 @@
 import matplotlib
+
 matplotlib.use("Agg")
 
 import pandas as pd
 import pytest
 
-
 SAMPLE_KREPORT = (
     "99.98\t999980\t0\tR\t1\troot\n"
     "99.98\t999980\t0\tD\t2\t  Bacteria\n"
@@ -43,12 +43,14 @@ def counts_txt_file(tmp_path):
 
 @pytest.fixture
 def counts_csv_file(tmp_path):
-    df = pd.DataFrame({
-        "Sample_id": ["S1", "S2"],
-        "Pseudomonas aeruginosa": [300000, 100000],
-        "Escherichia coli": [200000, 50000],
-        "Bacteroides fragilis": [100000, 200000],
-    })
+    df = pd.DataFrame(
+        {
+            "Sample_id": ["S1", "S2"],
+            "Pseudomonas aeruginosa": [300000, 100000],
+            "Escherichia coli": [200000, 50000],
+            "Bacteroides fragilis": [100000, 200000],
+        }
+    )
     f = tmp_path / "counts_species.csv"
     df.to_csv(f, index=False)
     return f
@@ -56,11 +58,17 @@ def counts_csv_file(tmp_path):
 
 @pytest.fixture
 def relabund_df():
-    return pd.DataFrame({
-        "Sample_id": ["S1", "S1", "S1", "S2", "S2", "S2"],
-        "taxon": [
-            "Pseudomonadota", "Bacillota", "Other (<4.0%)",
-            "Pseudomonadota", "Bacillota", "Other (<4.0%)",
-        ],
-        "rel_abund_perc": [70.0, 20.0, 10.0, 50.0, 35.0, 15.0],
-    })
+    return pd.DataFrame(
+        {
+            "Sample_id": ["S1", "S1", "S1", "S2", "S2", "S2"],
+            "taxon": [
+                "Pseudomonadota",
+                "Bacillota",
+                "Other (<4.0%)",
+                "Pseudomonadota",
+                "Bacillota",
+                "Other (<4.0%)",
+            ],
+            "rel_abund_perc": [70.0, 20.0, 10.0, 50.0, 35.0, 15.0],
+        }
+    )
diff --git a/tests/test_full_pipeline.py b/tests/test_full_pipeline.py
index 599f625..efa6854 100644
--- a/tests/test_full_pipeline.py
+++ b/tests/test_full_pipeline.py
@@ -1,8 +1,9 @@
-import zipfile
 import shutil
-import pytest
+import zipfile
 from pathlib import Path
 
+import pytest
+
 from krakenparser.pipeline import run_pipeline
 
 
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 7fed2fb..35913d3 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -6,13 +6,12 @@
 import pandas as pd
 import pytest
 
-from krakenparser.mpa.transform2mpa import kreport_to_mpa
 from krakenparser.counts.convert2csv import convert_to_csv
 from krakenparser.counts.processing_script import process_files
 from krakenparser.counts.split_mpa import split_mpa
-from krakenparser.stats.relabund import calculate_rel_abund
+from krakenparser.mpa.transform2mpa import kreport_to_mpa
 from krakenparser.stats.diversity import calc_alpha_div, calc_beta_div
-
+from krakenparser.stats.relabund import calculate_rel_abund
 
 SAMPLE_COMBINED_MPA = (
     "#Classification\tsample1\tsample2\n"
@@ -37,6 +36,7 @@ def combined_mpa_file(tmp_path):
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _sha256(path) -> str:
     return hashlib.sha256(path.read_bytes()).hexdigest()
 
@@ -45,6 +45,7 @@ def _sha256(path) -> str:
 # kreport_to_mpa
 # ---------------------------------------------------------------------------
 
+
 def test_kreport_to_mpa_reproducible(kreport_file, tmp_path):
     counter = itertools.count()
 
@@ -99,6 +100,7 @@ def test_kreport_to_mpa_paths_are_hierarchical(kreport_file, tmp_path):
 # convert_to_csv
 # ---------------------------------------------------------------------------
 
+
 def test_convert_to_csv_reproducible(counts_txt_file, tmp_path):
     counter = itertools.count()
 
@@ -124,6 +126,7 @@ def test_convert_to_csv_transposes_correctly(counts_txt_file, tmp_path):
 # process_files
 # ---------------------------------------------------------------------------
 
+
 def test_process_files_adds_header_and_cleans_names(tmp_path):
     source = tmp_path / "COMBINED.txt"
     source.write_text(
@@ -132,8 +135,7 @@ def test_process_files_adds_header_and_cleans_names(tmp_path):
     )
     dest = tmp_path / "counts_species.txt"
     dest.write_text(
-        "s__Pseudomonas_aeruginosa\t300\t100\n"
-        "s__Escherichia_coli\t200\t50\n"
+        "s__Pseudomonas_aeruginosa\t300\t100\ns__Escherichia_coli\t200\t50\n"
     )
     process_files(str(source), str(dest))
     result = dest.read_text()
@@ -151,7 +153,9 @@ def test_process_files_reproducible(tmp_path):
         dest = tmp_path / f"counts_{i}.txt"
         dest.write_text("s__Some_species\t10\n")
         process_files(str(source), str(dest))
-    assert (tmp_path / "counts_0.txt").read_text() == (tmp_path / "counts_1.txt").read_text()
+    assert (tmp_path / "counts_0.txt").read_text() == (
+        tmp_path / "counts_1.txt"
+    ).read_text()
 
 
 def test_process_files_missing_source_raises(tmp_path):
@@ -177,6 +181,7 @@ def test_convert_to_csv_missing_input_raises(tmp_path):
 # calculate_rel_abund
 # ---------------------------------------------------------------------------
 
+
 def test_relabund_reproducible(counts_csv_file, tmp_path):
     counter = itertools.count()
 
@@ -220,6 +225,7 @@ def test_relabund_missing_input_raises(tmp_path):
 # calc_alpha_div
 # ---------------------------------------------------------------------------
 
+
 def test_alpha_div_reproducible(counts_csv_file, tmp_path):
     df = pd.read_csv(counts_csv_file, index_col=0)
     counter = itertools.count()
@@ -256,6 +262,7 @@ def test_alpha_div_shannon_non_negative(counts_csv_file, tmp_path):
 # calc_beta_div
 # ---------------------------------------------------------------------------
 
+
 def test_beta_div_output_files_exist(counts_csv_file, tmp_path):
     df = pd.read_csv(counts_csv_file, index_col=0)
     out_dir = tmp_path / "diversity"
@@ -281,13 +288,12 @@ def test_beta_div_diagonal_is_zero(counts_csv_file, tmp_path):
     calc_beta_div(df, out_dir, rarefaction_depth=1000)
     bray = pd.read_csv(out_dir / "beta_div_bray.csv", index_col=0)
     import numpy as np
+
     assert np.allclose(np.diag(bray.values), 0.0)
 
 
 def test_beta_div_too_few_samples_raises(tmp_path):
-    df = pd.DataFrame(
-        {"Taxon_A": [100], "Taxon_B": [200]}, index=["S1"]
-    )
+    df = pd.DataFrame({"Taxon_A": [100], "Taxon_B": [200]}, index=["S1"])
     out_dir = tmp_path / "diversity"
     out_dir.mkdir()
     with pytest.raises(ValueError, match="rarefaction"):
@@ -298,6 +304,7 @@ def test_beta_div_too_few_samples_raises(tmp_path):
 # split_mpa
 # ---------------------------------------------------------------------------
 
+
 def test_split_mpa_creates_all_rank_files(combined_mpa_file, tmp_path):
     split_mpa(str(combined_mpa_file), str(tmp_path))
     for rank in ("species", "genus", "family", "order", "class", "phylum"):
diff --git a/tests/test_kpplot.py b/tests/test_kpplot.py
index 51cb5e1..a85f74d 100644
--- a/tests/test_kpplot.py
+++ b/tests/test_kpplot.py
@@ -2,16 +2,16 @@
 
 import pytest
 
+from krakenparser.kpplot.base import KpPlotBase, aggregate_by_metadata
+from krakenparser.kpplot.clustermap import clustermap
 from krakenparser.kpplot.stackedbar import stacked_barplot
 from krakenparser.kpplot.streamgraph import streamgraph
-from krakenparser.kpplot.clustermap import clustermap
-from krakenparser.kpplot.base import KpPlotBase, aggregate_by_metadata
-
 
 # ---------------------------------------------------------------------------
 # Smoke tests — verify each plot function returns without error
 # ---------------------------------------------------------------------------
 
+
 def test_stackedbar_returns_kpplotbase(relabund_df):
     result = stacked_barplot(relabund_df)
     assert isinstance(result, KpPlotBase)
@@ -31,6 +31,7 @@ def test_clustermap_returns_kpplotbase(relabund_df):
 # sample_order validation
 # ---------------------------------------------------------------------------
 
+
 def test_stackedbar_sample_order_missing_raises(relabund_df):
     with pytest.raises(ValueError, match="Samples missing"):
         stacked_barplot(relabund_df, sample_order=["S1", "S2", "GHOST"])
@@ -50,6 +51,7 @@ def test_clustermap_sample_order_missing_raises(relabund_df):
 # cmap validation (stackedbar / streamgraph)
 # ---------------------------------------------------------------------------
 
+
 def test_stackedbar_cmap_too_short_raises(relabund_df):
     with pytest.raises(ValueError, match="cmap"):
         stacked_barplot(relabund_df, cmap=["red"])
@@ -74,13 +76,16 @@ def test_streamgraph_cmap_invalid_type_raises(relabund_df):
 # aggregate_by_metadata
 # ---------------------------------------------------------------------------
 
+
 def test_aggregate_by_metadata_basic(relabund_df):
     import pandas as pd
 
-    metadata = pd.DataFrame({
-        "Sample_id": ["S1", "S2"],
-        "Group": ["A", "A"],
-    })
+    metadata = pd.DataFrame(
+        {
+            "Sample_id": ["S1", "S2"],
+            "Group": ["A", "A"],
+        }
+    )
     result = aggregate_by_metadata(relabund_df, metadata, "Group")
     assert "Sample_id" in result.columns
     assert set(result["Sample_id"]) == {"A"}
diff --git a/tests/test_units.py b/tests/test_units.py
index 57618de..39435d3 100644
--- a/tests/test_units.py
+++ b/tests/test_units.py
@@ -2,23 +2,22 @@
 
 import math
 
-import numpy as np
 import pytest
 
+from krakenparser.counts.processing_script import modify_taxa_names
 from krakenparser.mpa.transform2mpa import _parse_line
 from krakenparser.stats.diversity import chao1_index, pielou_evenness, shannon_index
-from krakenparser.counts.processing_script import modify_taxa_names
-
 
 # ---------------------------------------------------------------------------
 # _parse_line
 # ---------------------------------------------------------------------------
 
+
 def test_parse_line_standard_rank():
     line = "50.00\t500000\t100000\tP\t1224\t    Pseudomonadota\n"
     name, depth, rank, cum_reads, pct = _parse_line(line)
     assert name == "Pseudomonadota"
-    assert depth == 2       # 4 leading spaces // 2
+    assert depth == 2  # 4 leading spaces // 2
     assert rank == "P"
     assert cum_reads == 500000
     assert pct == 50.0
@@ -37,7 +36,7 @@ def test_parse_line_intermediate_rank():
     name, depth, rank, _, _ = _parse_line(line)
     assert name == "Some subspecies"
     assert rank == "S1"
-    assert depth == 5       # 10 spaces // 2
+    assert depth == 5  # 10 spaces // 2
 
 
 def test_parse_line_too_few_columns():
@@ -56,6 +55,7 @@ def test_parse_line_non_numeric_reads():
 # shannon_index
 # ---------------------------------------------------------------------------
 
+
 def test_shannon_uniform_four_species():
     assert abs(shannon_index([1, 1, 1, 1]) - math.log(4)) < 1e-10
 
@@ -76,6 +76,7 @@ def test_shannon_two_equal_species():
 # pielou_evenness
 # ---------------------------------------------------------------------------
 
+
 def test_pielou_single_species_returns_nan():
     assert math.isnan(pielou_evenness([100]))
 
@@ -97,6 +98,7 @@ def test_pielou_range_zero_to_one():
 # chao1_index
 # ---------------------------------------------------------------------------
 
+
 def test_chao1_f2_zero_uses_f1_formula():
     # F1=3, F2=0 → S_obs + F1*(F1-1)/2
     counts = [1, 1, 1, 5, 10]  # F1=3, F2=0, S_obs=5
@@ -121,6 +123,7 @@ def test_chao1_no_singletons():
 # modify_taxa_names
 # ---------------------------------------------------------------------------
 
+
 def test_modify_taxa_names_strips_prefix_and_replaces_underscores():
     assert modify_taxa_names("s__Homo_sapiens\t100\t200") == "Homo sapiens\t100\t200"
 

From 8e3c0000c0d0510fdbe24f6e27938d347600b2ee Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Mon, 18 May 2026 14:46:11 +0200
Subject: [PATCH 2/6] upd ver

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7af15d5..4ae9bca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "krakenparser"
-version = "1.0.0"
+version = "1.0.1"
 description = "A collection of scripts designed to process Kraken2 reports and convert them into CSV format."
 readme = {file = "README.md", content-type = "text/markdown"}
 license = {file = "LICENSE"}

From 34616e59350e2c05d5f14b96c3a168b6632e9c31 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Mon, 18 May 2026 15:02:15 +0200
Subject: [PATCH 3/6] ci: replace flake8 with Ruff linter and formatter

---
 .github/workflows/python-package.yml | 55 +++++++++++++---------------
 1 file changed, 25 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 553faa6..7a993e1 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -1,46 +1,41 @@
 # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-
 name: Python package
-
 on:
   push:
     branches: [ "dev", "main" ]
   pull_request:
     branches: [ "dev", "main" ]
-
 jobs:
   build:
-
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
         python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
-
     steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install flake8 setuptools wheel
-        pip install -e ".[dev]" --no-build-isolation
-    - name: Lint with flake8
-      run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
-      run: |
-        pytest --cov=krakenparser --cov-report=xml
-    - name: Upload coverage to Codecov
-      if: matrix.python-version == '3.12'
-      uses: codecov/codecov-action@v5
-      with:
-        files: coverage.xml
-        token: ${{ secrets.CODECOV_TOKEN }}
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install ruff setuptools wheel
+          pip install -e ".[dev]" --no-build-isolation
+      - name: Lint with Ruff
+        run: |
+          ruff check .
+      - name: Format check with Ruff
+        run: |
+          ruff format --check .
+      - name: Test with pytest
+        run: |
+          pytest --cov=krakenparser --cov-report=xml
+      - name: Upload coverage to Codecov
+        if: matrix.python-version == '3.12'
+        uses: codecov/codecov-action@v5
+        with:
+          files: coverage.xml
+          token: ${{ secrets.CODECOV_TOKEN }}
\ No newline at end of file

From 3f6f0b47de7248b2b84217a339cda153723ff2bd Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Mon, 18 May 2026 15:02:24 +0200
Subject: [PATCH 4/6] ci: add codecov.yml with informational patch coverage

---
 codecov.yml | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 codecov.yml

diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 0000000..bc81794
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,6 @@
+# codecov.yml
+coverage:
+  patch:
+    target: 78%   # снизить порог
+    # или:
+    informational: true  # не фейлить, только информировать
\ No newline at end of file

From bc0f1e579f7c061681ad3bb43024714f11ea9710 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Mon, 18 May 2026 15:05:11 +0200
Subject: [PATCH 5/6] style(ci): remove comments from workflow file

---
 codecov.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/codecov.yml b/codecov.yml
index bc81794..4200a1c 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -1,6 +1,4 @@
-# codecov.yml
 coverage:
   patch:
-    target: 78%   # снизить порог
-    # или:
-    informational: true  # не фейлить, только информировать
\ No newline at end of file
+    target: 78%
+    informational: true
\ No newline at end of file

From 4dc34dc4289b34ad0eebe82ddaeb6e8f2c224507 Mon Sep 17 00:00:00 2001
From: Ilia Popov <iljapopov17@gmail.com>
Date: Mon, 18 May 2026 15:05:52 +0200
Subject: [PATCH 6/6] revert: replace flake8 with Ruff linter and formatter

---
 .github/workflows/python-package.yml | 55 +++++++++++++++-------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 7a993e1..553faa6 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -1,41 +1,46 @@
 # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
 name: Python package
+
 on:
   push:
     branches: [ "dev", "main" ]
   pull_request:
     branches: [ "dev", "main" ]
+
 jobs:
   build:
+
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
         python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
+
     steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install ruff setuptools wheel
-          pip install -e ".[dev]" --no-build-isolation
-      - name: Lint with Ruff
-        run: |
-          ruff check .
-      - name: Format check with Ruff
-        run: |
-          ruff format --check .
-      - name: Test with pytest
-        run: |
-          pytest --cov=krakenparser --cov-report=xml
-      - name: Upload coverage to Codecov
-        if: matrix.python-version == '3.12'
-        uses: codecov/codecov-action@v5
-        with:
-          files: coverage.xml
-          token: ${{ secrets.CODECOV_TOKEN }}
\ No newline at end of file
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 setuptools wheel
+        pip install -e ".[dev]" --no-build-isolation
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pytest --cov=krakenparser --cov-report=xml
+    - name: Upload coverage to Codecov
+      if: matrix.python-version == '3.12'
+      uses: codecov/codecov-action@v5
+      with:
+        files: coverage.xml
+        token: ${{ secrets.CODECOV_TOKEN }}