Skip to content

Index

papertrail

papertrail - retrieve author publications and compute bibliometric metrics.

Example

from papertrail import AuthorProfile profile = AuthorProfile("Marie Curie").fetch() metrics = profile.metrics() print(metrics.h_index)

AuthorProfile

Retrieve and analyse all publications of a single author.

AuthorProfile is the primary API surface of papertrail. It combines a :class:~papertrail.fetchers.base.BaseFetcher for data retrieval with optional :class:~papertrail.metrics.impact_factor.ImpactFactorDatabase enrichment and a set of export helpers.

Parameters:

Name Type Description Default
name str

Author name (full name or last-name prefix).

required
fetcher BaseFetcher | None

Custom fetcher instance. Defaults to :class:~papertrail.fetchers.openalex.OpenAlexFetcher.

None
email str | None

E-mail passed to the default OpenAlex fetcher to enable the polite pool. Ignored when fetcher is provided explicitly.

None
Example

profile = AuthorProfile("Marie Curie").fetch() m = profile.metrics() print(m.h_index)

Source code in src/papertrail/author.py
class AuthorProfile:
    """Retrieve and analyse all publications of a single author.

    ``AuthorProfile`` is the primary API surface of *papertrail*.  It combines
    a :class:`~papertrail.fetchers.base.BaseFetcher` for data retrieval with
    optional :class:`~papertrail.metrics.impact_factor.ImpactFactorDatabase`
    enrichment and a set of export helpers.

    Args:
        name: Author name (full name or last-name prefix).
        fetcher: Custom fetcher instance.  Defaults to
            :class:`~papertrail.fetchers.openalex.OpenAlexFetcher`.
        email: E-mail passed to the default OpenAlex fetcher to enable the
            *polite pool*.  Ignored when *fetcher* is provided explicitly.

    Example:
        >>> profile = AuthorProfile("Marie Curie").fetch()
        >>> m = profile.metrics()
        >>> print(m.h_index)
    """

    def __init__(
        self,
        name: str,
        *,
        fetcher: BaseFetcher | None = None,
        email: str | None = None,
        enable_local_cache: bool | None = None,
        cache_path: str | Path | None = None,
        enable_user_data: bool | None = None,
        user_data_path: str | Path | None = None,
    ) -> None:
        self.name = name
        self._fetcher: BaseFetcher = fetcher or OpenAlexFetcher(email=email)
        self._publications: list[Publication] = []
        self._author_info: AuthorInfo | None = None
        self._if_database: ImpactFactorDatabase | None = None
        self._source_analysis: dict[str, Any] | None = None
        self._cached_metrics: AuthorMetrics | None = None

        if (
            enable_user_data is not None
            and enable_local_cache is not None
            and enable_user_data != enable_local_cache
        ):
            raise ValueError(
                "Received conflicting values for enable_user_data and enable_local_cache."
            )
        resolved_enable_user_data = (
            enable_user_data
            if enable_user_data is not None
            else (enable_local_cache if enable_local_cache is not None else True)
        )

        if cache_path is not None and user_data_path is not None:
            raise ValueError(
                "Use only one of user_data_path or cache_path, not both."
            )
        resolved_path = user_data_path if user_data_path is not None else cache_path
        resolved_cache_path = Path(resolved_path) if resolved_path is not None else None

        self._local_cache = (
            LocalMetricsCache(resolved_cache_path)
            if resolved_enable_user_data
            else None
        )

    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------

    @property
    def publications(self) -> list[Publication]:
        """All retrieved publications (empty until :meth:`fetch` is called)."""
        return self._publications

    @property
    def author_info(self) -> AuthorInfo | None:
        """Resolved author metadata, or ``None`` if not yet fetched."""
        return self._author_info

    # ------------------------------------------------------------------
    # Configuration
    # ------------------------------------------------------------------

    def use_impact_factor_database(self, db: ImpactFactorDatabase) -> AuthorProfile:
        """Attach a custom impact factor database.

        If publications have already been fetched, they are enriched
        immediately.  Otherwise, enrichment happens automatically during
        :meth:`fetch`.

        Args:
            db: A pre-loaded
                :class:`~papertrail.metrics.impact_factor.ImpactFactorDatabase`.

        Returns:
            ``self`` for method chaining.
        """
        self._if_database = db
        if self._publications:
            self._publications = db.enrich_publications(self._publications)
            self._cached_metrics = None
        return self

    # ------------------------------------------------------------------
    # Fetching
    # ------------------------------------------------------------------

    def search_candidates(self) -> list[AuthorInfo]:
        """Return candidates matching :attr:`name` without fetching publications.

        Useful for disambiguating common names before committing to a specific
        author ID.

        Returns:
            A list of :class:`~papertrail.models.AuthorInfo` objects.

        Raises:
            FetchError: If the API request fails.
        """
        return self._fetcher.search_authors(self.name)

    def fetch(
        self,
        author_id: str | None = None,
        *,
        max_results: int | None = None,
    ) -> AuthorProfile:
        """Fetch publications for this author.

        Args:
            author_id: Explicit author identifier (e.g. OpenAlex author ID
                URL).  When ``None``, the best-ranked search result for
                :attr:`name` is used automatically.
            max_results: Cap the number of returned publications.  ``None``
                fetches all available works.

        Returns:
            ``self`` for method chaining.

        Raises:
            AuthorNotFoundError: If no author matches the name.
            MultipleAuthorsFoundError: Raised only when *author_id* is ``None``
                and you explicitly call this in strict mode (not raised by
                default - the top result is used).
            FetchError: If an API request fails.

        Example:
            >>> profile = AuthorProfile("Ada Lovelace").fetch()
            >>> len(profile.publications) > 0
            True
        """
        if author_id is None:
            candidates = self._fetcher.search_authors(self.name)
            if not candidates:
                raise AuthorNotFoundError(f"No author found matching '{self.name}'.")
            self._author_info = candidates[0]
            author_id = candidates[0].id or ""
        else:
            # Populate author_info from the ID if not already set
            if self._author_info is None:
                self._author_info = AuthorInfo(id=author_id, name=self.name)

        self._publications = self._fetcher.fetch_publications(
            author_id, max_results=max_results
        )

        if self._if_database is not None:
            self._publications = self._if_database.enrich_publications(
                self._publications
            )

        self._source_analysis = self._fetcher.fetch_analyze_metrics(self._publications)
        self._cached_metrics = compute_metrics(
            author_name=self.name,
            publications=self._publications,
            openalex_id=self._author_info.id if self._author_info else None,
            orcid=self._author_info.orcid if self._author_info else None,
            source_analysis=self._source_analysis,
        )

        if self._local_cache is not None:
            self._local_cache.save_fetch(
                author_name=self.name,
                author_info=self._author_info,
                publications=self._publications,
                metrics=self._cached_metrics,
                fetcher_name=type(self._fetcher).__name__,
            )

        return self

    # ------------------------------------------------------------------
    # Metrics
    # ------------------------------------------------------------------

    def metrics(self) -> AuthorMetrics:
        """Compute bibliometric metrics from the fetched publications.

        Returns:
            An :class:`~papertrail.models.AuthorMetrics` instance.

        Raises:
            RuntimeError: If :meth:`fetch` has not been called yet.
        """
        if not self._publications and self._author_info is None:
            raise RuntimeError(
                "No publications loaded.  Call fetch() before metrics()."
            )

        if self._cached_metrics is not None:
            return self._cached_metrics

        info = self._author_info
        if self._source_analysis is None:
            self._source_analysis = self._fetcher.fetch_analyze_metrics(
                self._publications
            )
        self._cached_metrics = compute_metrics(
            author_name=self.name,
            publications=self._publications,
            openalex_id=info.id if info else None,
            orcid=info.orcid if info else None,
            source_analysis=self._source_analysis,
        )
        return self._cached_metrics

    # ------------------------------------------------------------------
    # Exports
    # ------------------------------------------------------------------

    def export_bibtex(self, path: str | Path) -> None:
        """Export publications to a BibTeX ``.bib`` file.

        Args:
            path: Destination file path.

        Raises:
            ExportError: If the file cannot be written.
        """
        bibtex_mod.export_bibtex(self._publications, Path(path))

    def export_publications(
        self,
        path: str | Path,
        *,
        fmt: ExportFormat = "json",
    ) -> None:
        """Export the publication list to a file.

        Args:
            path: Destination file path.
            fmt: Output format - ``"json"`` (default) or ``"csv"``.

        Raises:
            ValueError: If *fmt* is not supported.
            ExportError: If the file cannot be written.
        """
        dest = Path(path)
        if fmt == "json":
            json_exporter.export_publications_json(self._publications, dest)
        elif fmt == "csv":
            csv_exporter.export_publications_csv(self._publications, dest)
        else:
            raise ValueError(f"Unsupported export format: {fmt!r}")

    def export_metrics(
        self,
        path: str | Path,
        *,
        fmt: ExportFormat = "json",
    ) -> None:
        """Compute and export metrics to a file.

        Args:
            path: Destination file path.
            fmt: Output format - ``"json"`` (default) or ``"csv"``.

        Raises:
            ValueError: If *fmt* is not supported.
            ExportError: If the file cannot be written.
        """
        m = self.metrics()
        dest = Path(path)
        if fmt == "json":
            json_exporter.export_metrics_json(m, dest)
        elif fmt == "csv":
            csv_exporter.export_metrics_csv(m, dest)
        else:
            raise ValueError(f"Unsupported export format: {fmt!r}")

    def dashboard(self) -> object:
        """Build the default interactive Bokeh dashboard for this profile.

        Returns:
            A Bokeh layout containing the available plots.

        Raises:
            RuntimeError: If :meth:`fetch` has not been called yet.
        """
        return build_author_dashboard(self.metrics())

    def export_dashboard(
        self,
        path: str | Path,
        *,
        fmt: PlotFormat = "html",
    ) -> None:
        """Export the default interactive dashboard.

        Args:
            path: Destination file path.
            fmt: Output format - ``"html"`` for standalone interactive output
                or ``"json"`` for embeddable Bokeh JSON. Static exports are
                also supported with ``"png"`` and ``"pdf"``.
        """
        export_dashboard(self.metrics(), Path(path), fmt=fmt)

    # ------------------------------------------------------------------
    # Dunder
    # ------------------------------------------------------------------

    def __repr__(self) -> str:
        n_pubs = len(self._publications)
        return f"AuthorProfile(name={self.name!r}, publications={n_pubs})"

publications property

publications: list[Publication]

All retrieved publications (empty until :meth:fetch is called).

author_info property

author_info: AuthorInfo | None

Resolved author metadata, or None if not yet fetched.

use_impact_factor_database

use_impact_factor_database(
    db: ImpactFactorDatabase,
) -> AuthorProfile

Attach a custom impact factor database.

If publications have already been fetched, they are enriched immediately. Otherwise, enrichment happens automatically during :meth:fetch.

Parameters:

Name Type Description Default
db ImpactFactorDatabase

A pre-loaded :class:~papertrail.metrics.impact_factor.ImpactFactorDatabase.

required

Returns:

Type Description
AuthorProfile

self for method chaining.

Source code in src/papertrail/author.py
def use_impact_factor_database(self, db: ImpactFactorDatabase) -> AuthorProfile:
    """Attach a custom impact factor database.

    If publications have already been fetched, they are enriched
    immediately.  Otherwise, enrichment happens automatically during
    :meth:`fetch`.

    Args:
        db: A pre-loaded
            :class:`~papertrail.metrics.impact_factor.ImpactFactorDatabase`.

    Returns:
        ``self`` for method chaining.
    """
    self._if_database = db
    if self._publications:
        self._publications = db.enrich_publications(self._publications)
        self._cached_metrics = None
    return self

search_candidates

search_candidates() -> list[AuthorInfo]

Return candidates matching :attr:name without fetching publications.

Useful for disambiguating common names before committing to a specific author ID.

Returns:

Type Description
list[AuthorInfo]

A list of :class:~papertrail.models.AuthorInfo objects.

Raises:

Type Description
FetchError

If the API request fails.

Source code in src/papertrail/author.py
def search_candidates(self) -> list[AuthorInfo]:
    """Return candidates matching :attr:`name` without fetching publications.

    Useful for disambiguating common names before committing to a specific
    author ID.

    Returns:
        A list of :class:`~papertrail.models.AuthorInfo` objects.

    Raises:
        FetchError: If the API request fails.
    """
    return self._fetcher.search_authors(self.name)

fetch

fetch(
    author_id: str | None = None,
    *,
    max_results: int | None = None,
) -> AuthorProfile

Fetch publications for this author.

Parameters:

Name Type Description Default
author_id str | None

Explicit author identifier (e.g. OpenAlex author ID URL). When None, the best-ranked search result for :attr:name is used automatically.

None
max_results int | None

Cap the number of returned publications. None fetches all available works.

None

Returns:

Type Description
AuthorProfile

self for method chaining.

Raises:

Type Description
AuthorNotFoundError

If no author matches the name.

MultipleAuthorsFoundError

Raised only when author_id is None and you explicitly call this in strict mode (not raised by default - the top result is used).

FetchError

If an API request fails.

Example

profile = AuthorProfile("Ada Lovelace").fetch() len(profile.publications) > 0 True

Source code in src/papertrail/author.py
def fetch(
    self,
    author_id: str | None = None,
    *,
    max_results: int | None = None,
) -> AuthorProfile:
    """Fetch publications for this author.

    Args:
        author_id: Explicit author identifier (e.g. OpenAlex author ID
            URL).  When ``None``, the best-ranked search result for
            :attr:`name` is used automatically.
        max_results: Cap the number of returned publications.  ``None``
            fetches all available works.

    Returns:
        ``self`` for method chaining.

    Raises:
        AuthorNotFoundError: If no author matches the name.
        MultipleAuthorsFoundError: Raised only when *author_id* is ``None``
            and you explicitly call this in strict mode (not raised by
            default - the top result is used).
        FetchError: If an API request fails.

    Example:
        >>> profile = AuthorProfile("Ada Lovelace").fetch()
        >>> len(profile.publications) > 0
        True
    """
    if author_id is None:
        candidates = self._fetcher.search_authors(self.name)
        if not candidates:
            raise AuthorNotFoundError(f"No author found matching '{self.name}'.")
        self._author_info = candidates[0]
        author_id = candidates[0].id or ""
    else:
        # Populate author_info from the ID if not already set
        if self._author_info is None:
            self._author_info = AuthorInfo(id=author_id, name=self.name)

    self._publications = self._fetcher.fetch_publications(
        author_id, max_results=max_results
    )

    if self._if_database is not None:
        self._publications = self._if_database.enrich_publications(
            self._publications
        )

    self._source_analysis = self._fetcher.fetch_analyze_metrics(self._publications)
    self._cached_metrics = compute_metrics(
        author_name=self.name,
        publications=self._publications,
        openalex_id=self._author_info.id if self._author_info else None,
        orcid=self._author_info.orcid if self._author_info else None,
        source_analysis=self._source_analysis,
    )

    if self._local_cache is not None:
        self._local_cache.save_fetch(
            author_name=self.name,
            author_info=self._author_info,
            publications=self._publications,
            metrics=self._cached_metrics,
            fetcher_name=type(self._fetcher).__name__,
        )

    return self

metrics

metrics() -> AuthorMetrics

Compute bibliometric metrics from the fetched publications.

Returns:

Name Type Description
An AuthorMetrics

class:~papertrail.models.AuthorMetrics instance.

Raises:

Type Description
RuntimeError

If :meth:fetch has not been called yet.

Source code in src/papertrail/author.py
def metrics(self) -> AuthorMetrics:
    """Compute bibliometric metrics from the fetched publications.

    Returns:
        An :class:`~papertrail.models.AuthorMetrics` instance.

    Raises:
        RuntimeError: If :meth:`fetch` has not been called yet.
    """
    if not self._publications and self._author_info is None:
        raise RuntimeError(
            "No publications loaded.  Call fetch() before metrics()."
        )

    if self._cached_metrics is not None:
        return self._cached_metrics

    info = self._author_info
    if self._source_analysis is None:
        self._source_analysis = self._fetcher.fetch_analyze_metrics(
            self._publications
        )
    self._cached_metrics = compute_metrics(
        author_name=self.name,
        publications=self._publications,
        openalex_id=info.id if info else None,
        orcid=info.orcid if info else None,
        source_analysis=self._source_analysis,
    )
    return self._cached_metrics

export_bibtex

export_bibtex(path: str | Path) -> None

Export publications to a BibTeX .bib file.

Parameters:

Name Type Description Default
path str | Path

Destination file path.

required

Raises:

Type Description
ExportError

If the file cannot be written.

Source code in src/papertrail/author.py
def export_bibtex(self, path: str | Path) -> None:
    """Export publications to a BibTeX ``.bib`` file.

    Args:
        path: Destination file path.

    Raises:
        ExportError: If the file cannot be written.
    """
    bibtex_mod.export_bibtex(self._publications, Path(path))

export_publications

export_publications(
    path: str | Path, *, fmt: ExportFormat = "json"
) -> None

Export the publication list to a file.

Parameters:

Name Type Description Default
path str | Path

Destination file path.

required
fmt ExportFormat

Output format - "json" (default) or "csv".

'json'

Raises:

Type Description
ValueError

If fmt is not supported.

ExportError

If the file cannot be written.

Source code in src/papertrail/author.py
def export_publications(
    self,
    path: str | Path,
    *,
    fmt: ExportFormat = "json",
) -> None:
    """Export the publication list to a file.

    Args:
        path: Destination file path.
        fmt: Output format - ``"json"`` (default) or ``"csv"``.

    Raises:
        ValueError: If *fmt* is not supported.
        ExportError: If the file cannot be written.
    """
    dest = Path(path)
    if fmt == "json":
        json_exporter.export_publications_json(self._publications, dest)
    elif fmt == "csv":
        csv_exporter.export_publications_csv(self._publications, dest)
    else:
        raise ValueError(f"Unsupported export format: {fmt!r}")

export_metrics

export_metrics(
    path: str | Path, *, fmt: ExportFormat = "json"
) -> None

Compute and export metrics to a file.

Parameters:

Name Type Description Default
path str | Path

Destination file path.

required
fmt ExportFormat

Output format - "json" (default) or "csv".

'json'

Raises:

Type Description
ValueError

If fmt is not supported.

ExportError

If the file cannot be written.

Source code in src/papertrail/author.py
def export_metrics(
    self,
    path: str | Path,
    *,
    fmt: ExportFormat = "json",
) -> None:
    """Compute and export metrics to a file.

    Args:
        path: Destination file path.
        fmt: Output format - ``"json"`` (default) or ``"csv"``.

    Raises:
        ValueError: If *fmt* is not supported.
        ExportError: If the file cannot be written.
    """
    m = self.metrics()
    dest = Path(path)
    if fmt == "json":
        json_exporter.export_metrics_json(m, dest)
    elif fmt == "csv":
        csv_exporter.export_metrics_csv(m, dest)
    else:
        raise ValueError(f"Unsupported export format: {fmt!r}")

dashboard

dashboard() -> object

Build the default interactive Bokeh dashboard for this profile.

Returns:

Type Description
object

A Bokeh layout containing the available plots.

Raises:

Type Description
RuntimeError

If :meth:fetch has not been called yet.

Source code in src/papertrail/author.py
def dashboard(self) -> object:
    """Build the default interactive Bokeh dashboard for this profile.

    Returns:
        A Bokeh layout containing the available plots.

    Raises:
        RuntimeError: If :meth:`fetch` has not been called yet.
    """
    return build_author_dashboard(self.metrics())

export_dashboard

export_dashboard(
    path: str | Path, *, fmt: PlotFormat = "html"
) -> None

Export the default interactive dashboard.

Parameters:

Name Type Description Default
path str | Path

Destination file path.

required
fmt PlotFormat

Output format - "html" for standalone interactive output or "json" for embeddable Bokeh JSON. Static exports are also supported with "png" and "pdf".

'html'
Source code in src/papertrail/author.py
def export_dashboard(
    self,
    path: str | Path,
    *,
    fmt: PlotFormat = "html",
) -> None:
    """Export the default interactive dashboard.

    Args:
        path: Destination file path.
        fmt: Output format - ``"html"`` for standalone interactive output
            or ``"json"`` for embeddable Bokeh JSON. Static exports are
            also supported with ``"png"`` and ``"pdf"``.
    """
    export_dashboard(self.metrics(), Path(path), fmt=fmt)

AuthorNotFoundError

Bases: PapertrailError

Raised when no author matches the given name or ID.

Source code in src/papertrail/exceptions.py
class AuthorNotFoundError(PapertrailError):
    """Raised when no author matches the given name or ID."""

ExportError

Bases: PapertrailError

Raised when exporting data to a file fails.

Source code in src/papertrail/exceptions.py
class ExportError(PapertrailError):
    """Raised when exporting data to a file fails."""

FetchError

Bases: PapertrailError

Raised when an external API request fails.

Source code in src/papertrail/exceptions.py
class FetchError(PapertrailError):
    """Raised when an external API request fails."""

ADSFetcher

Bases: BaseFetcher

Fetcher backed by the NASA ADS Search API.

Parameters:

Name Type Description Default
token str | None

ADS API token. If omitted, reads ADS_API_TOKEN from env.

None

Raises:

Type Description
FetchError

If no token is available.

Source code in src/papertrail/fetchers/ads.py
class ADSFetcher(BaseFetcher):
    """Fetcher backed by the NASA ADS Search API.

    Args:
        token: ADS API token. If omitted, reads ``ADS_API_TOKEN`` from env.

    Raises:
        FetchError: If no token is available.
    """

    def __init__(self, token: str | None = None) -> None:
        load_dotenv(find_dotenv(usecwd=True), override=False)
        resolved_token = token or os.getenv("ADS_API_TOKEN")
        if not resolved_token:
            raise FetchError(
                "NASA ADS token is required. Set ADS_API_TOKEN in your environment "
                "or .env file, or pass a token explicitly."
            )
        self._token = resolved_token
        # Official ADS client uses module-level config.
        ads.config.token = resolved_token

    def search_authors(self, name: str) -> list[AuthorInfo]:
        """Return a single ADS candidate derived from the provided name.

        ADS does not provide a dedicated author-entity endpoint equivalent to
        OpenAlex author search for this package workflow. We therefore return a
        single candidate using the original input string as author query.
        """
        return [AuthorInfo(id=name, name=name)]

    def fetch_publications(
        self,
        author_id: str,
        *,
        max_results: int | None = None,
    ) -> list[Publication]:
        """Fetch ADS publications for an author query string.

        Args:
            author_id: ADS author query string (e.g. ``"Peresano, M"``).
            max_results: Optional cap on returned publications.

        Returns:
            List of parsed publications.
        """
        rows = 200
        publications: list[Publication] = []
        fields = [
            "bibcode",
            "title",
            "author",
            "pub",
            "pubdate",
            "year",
            "doi",
            "citation_count",
            "doctype",
            "property",
        ]
        query = ads.SearchQuery(
            q=f'author:"{author_id}"',
            fl=fields,
            rows=rows,
            sort="date desc",
        )

        try:
            for record in query:
                publications.append(self._parse_doc(record))
                if max_results is not None and len(publications) >= max_results:
                    break
        except Exception as exc:
            raise FetchError(
                f"Failed to fetch publications from ADS for author '{author_id}'"
            ) from exc

        return publications

    def fetch_analyze_metrics(
        self,
        publications: list[Publication],
    ) -> dict[str, Any] | None:
        """Fetch ADS native analyze metrics for the fetched publication set.

        Uses ADS Metrics API to retrieve indicator and time-series payloads
        when bibcodes are available.
        """
        bibcodes = [pub.id for pub in publications if pub.id]
        if not bibcodes:
            return None

        payload = {
            "bibcodes": bibcodes,
            "types": ["indicators", "timeseries", "histograms"],
            "histograms": ["publications", "citations"],
        }

        request = Request(
            "https://api.adsabs.harvard.edu/v1/metrics",
            data=json.dumps(payload).encode("utf-8"),
            headers={
                "Authorization": f"Bearer {self._token}",
                "Content-Type": "application/json",
            },
            method="POST",
        )

        try:
            with urlopen(request, timeout=30) as response:
                raw_payload = response.read().decode("utf-8")
            data = json.loads(raw_payload)
            return data if isinstance(data, dict) else None
        except Exception:
            return None

    @staticmethod
    def _parse_doc(doc: object) -> Publication:
        """Parse an ADS document record into a Publication model."""
        data = ADSFetcher._record_to_dict(doc)

        bibcode = str(data.get("bibcode") or "")
        title_list = data.get("title")
        title = ""
        if isinstance(title_list, list) and title_list:
            title = str(title_list[0])

        author_list = data.get("author")
        authors: list[AuthorInfo] = []
        if isinstance(author_list, list):
            authors = [AuthorInfo(name=str(a)) for a in author_list]

        year_raw = data.get("year")
        year = ADSFetcher._parse_year(year_raw, data.get("pubdate"))

        doi_raw = data.get("doi")
        doi: str | None = None
        if isinstance(doi_raw, list) and doi_raw:
            doi = str(doi_raw[0])
        elif isinstance(doi_raw, str):
            doi = doi_raw

        pub_name = data.get("pub")
        journal: JournalInfo | None = None
        if isinstance(pub_name, str) and pub_name:
            journal = JournalInfo(name=pub_name)

        properties = data.get("property")
        property_values: set[str] = set()
        if isinstance(properties, list):
            property_values = {str(p).upper() for p in properties}

        is_refereed = "REFEREED" in property_values

        citation_count_raw = data.get("citation_count")
        citation_count = (
            int(citation_count_raw) if isinstance(citation_count_raw, int) else 0
        )

        pub_type = data.get("doctype")
        pub_url = (
            f"https://ui.adsabs.harvard.edu/abs/{bibcode}/abstract" if bibcode else None
        )

        return Publication(
            id=bibcode,
            title=title,
            year=year,
            doi=doi,
            authors=authors,
            journal=journal,
            citation_count=citation_count,
            type=str(pub_type) if pub_type is not None else None,
            open_access=False,
            url=pub_url,
            refereed=is_refereed,
        )

    @staticmethod
    def _record_to_dict(record: object) -> dict[str, object]:
        """Normalize ADS record objects and plain dicts to a dictionary."""
        if isinstance(record, dict):
            return record
        # ``ads`` returns Article-like objects exposing ``_raw`` and attribute values.
        raw = getattr(record, "_raw", None)
        if isinstance(raw, dict):
            return raw
        result: dict[str, object] = {}
        for field in (
            "bibcode",
            "title",
            "author",
            "pub",
            "pubdate",
            "year",
            "doi",
            "citation_count",
            "doctype",
            "property",
        ):
            value = getattr(record, field, None)
            if value is not None:
                result[field] = value
        return result

    @staticmethod
    def _parse_year(year_raw: object, pubdate_raw: object) -> int:
        """Extract an integer publication year from ADS fields."""
        if isinstance(year_raw, str) and year_raw.isdigit():
            return int(year_raw)
        if isinstance(year_raw, int):
            return year_raw

        if isinstance(pubdate_raw, str):
            # ADS commonly uses YYYY-MM format.
            try:
                return datetime.strptime(pubdate_raw[:7], "%Y-%m").year
            except ValueError:
                if len(pubdate_raw) >= 4 and pubdate_raw[:4].isdigit():
                    return int(pubdate_raw[:4])

        return 0

search_authors

search_authors(name: str) -> list[AuthorInfo]

Return a single ADS candidate derived from the provided name.

ADS does not provide a dedicated author-entity endpoint equivalent to OpenAlex author search for this package workflow. We therefore return a single candidate using the original input string as author query.

Source code in src/papertrail/fetchers/ads.py
def search_authors(self, name: str) -> list[AuthorInfo]:
    """Return a single ADS candidate derived from the provided name.

    ADS does not provide a dedicated author-entity endpoint equivalent to
    OpenAlex author search for this package workflow. We therefore return a
    single candidate using the original input string as author query.
    """
    return [AuthorInfo(id=name, name=name)]

fetch_publications

fetch_publications(
    author_id: str, *, max_results: int | None = None
) -> list[Publication]

Fetch ADS publications for an author query string.

Parameters:

Name Type Description Default
author_id str

ADS author query string (e.g. "Peresano, M").

required
max_results int | None

Optional cap on returned publications.

None

Returns:

Type Description
list[Publication]

List of parsed publications.

Source code in src/papertrail/fetchers/ads.py
def fetch_publications(
    self,
    author_id: str,
    *,
    max_results: int | None = None,
) -> list[Publication]:
    """Fetch ADS publications for an author query string.

    Args:
        author_id: ADS author query string (e.g. ``"Peresano, M"``).
        max_results: Optional cap on returned publications.

    Returns:
        List of parsed publications.
    """
    rows = 200
    publications: list[Publication] = []
    fields = [
        "bibcode",
        "title",
        "author",
        "pub",
        "pubdate",
        "year",
        "doi",
        "citation_count",
        "doctype",
        "property",
    ]
    query = ads.SearchQuery(
        q=f'author:"{author_id}"',
        fl=fields,
        rows=rows,
        sort="date desc",
    )

    try:
        for record in query:
            publications.append(self._parse_doc(record))
            if max_results is not None and len(publications) >= max_results:
                break
    except Exception as exc:
        raise FetchError(
            f"Failed to fetch publications from ADS for author '{author_id}'"
        ) from exc

    return publications

fetch_analyze_metrics

fetch_analyze_metrics(
    publications: list[Publication],
) -> dict[str, Any] | None

Fetch ADS native analyze metrics for the fetched publication set.

Uses ADS Metrics API to retrieve indicator and time-series payloads when bibcodes are available.

Source code in src/papertrail/fetchers/ads.py
def fetch_analyze_metrics(
    self,
    publications: list[Publication],
) -> dict[str, Any] | None:
    """Fetch ADS native analyze metrics for the fetched publication set.

    Uses ADS Metrics API to retrieve indicator and time-series payloads
    when bibcodes are available.
    """
    bibcodes = [pub.id for pub in publications if pub.id]
    if not bibcodes:
        return None

    payload = {
        "bibcodes": bibcodes,
        "types": ["indicators", "timeseries", "histograms"],
        "histograms": ["publications", "citations"],
    }

    request = Request(
        "https://api.adsabs.harvard.edu/v1/metrics",
        data=json.dumps(payload).encode("utf-8"),
        headers={
            "Authorization": f"Bearer {self._token}",
            "Content-Type": "application/json",
        },
        method="POST",
    )

    try:
        with urlopen(request, timeout=30) as response:
            raw_payload = response.read().decode("utf-8")
        data = json.loads(raw_payload)
        return data if isinstance(data, dict) else None
    except Exception:
        return None

ImpactFactorDatabase

In-memory store of journal impact factors indexed by ISSN and year.

Example

from pathlib import Path db = ImpactFactorDatabase() db.load_csv(Path("jif_data.csv")) enriched = db.enrich_publications(publications)

Source code in src/papertrail/metrics/impact_factor.py
class ImpactFactorDatabase:
    """In-memory store of journal impact factors indexed by ISSN and year.

    Example:
        >>> from pathlib import Path
        >>> db = ImpactFactorDatabase()
        >>> db.load_csv(Path("jif_data.csv"))
        >>> enriched = db.enrich_publications(publications)
    """

    def __init__(self) -> None:
        # issn -> {year -> impact_factor}
        self._data: dict[str, dict[int, float]] = {}

    # ------------------------------------------------------------------
    # Loading
    # ------------------------------------------------------------------

    def load_csv(self, path: Path) -> None:
        """Load impact factors from a CSV file.

        The file must contain at minimum the columns ``issn``, ``year``, and
        ``impact_factor``.  Additional columns are silently ignored.

        Args:
            path: Path to the CSV file.

        Raises:
            FileNotFoundError: If *path* does not exist.
            KeyError: If a required column is missing.
            ValueError: If a numeric field cannot be parsed.
        """
        with path.open(newline="", encoding="utf-8") as fh:
            reader = csv.DictReader(fh)
            for row in reader:
                issn = row["issn"].strip()
                year = int(row["year"].strip())
                value = float(row["impact_factor"].strip())
                self._data.setdefault(issn, {})[year] = value

    def load_json(self, path: Path) -> None:
        """Load impact factors from a JSON file.

        The file must be a JSON object mapping ISSN strings to objects that
        map year strings (or integers) to float values.

        Args:
            path: Path to the JSON file.

        Raises:
            FileNotFoundError: If *path* does not exist.
            json.JSONDecodeError: If the file is not valid JSON.
            ValueError: If a numeric field cannot be parsed.
        """
        raw: dict[str, dict[str, float]] = json.loads(path.read_text(encoding="utf-8"))
        for issn, year_map in raw.items():
            entry = self._data.setdefault(issn, {})
            for year_key, value in year_map.items():
                entry[int(year_key)] = float(value)

    # ------------------------------------------------------------------
    # Querying
    # ------------------------------------------------------------------

    def get_impact_factor(
        self,
        issn: str,
        year: int,
        *,
        tolerance: int = 1,
    ) -> float | None:
        """Return the impact factor for a journal in a given year.

        If an exact match is not found, values within ``±tolerance`` years
        are checked in order of proximity.

        Args:
            issn: ISSN string (e.g. ``"0028-0836"``).
            year: Target year.
            tolerance: How many years to search around *year* when an exact
                match is unavailable.  Defaults to ``1``.

        Returns:
            The impact factor as a float, or ``None`` if no data is available.
        """
        yearly = self._data.get(issn)
        if yearly is None:
            return None
        if year in yearly:
            return yearly[year]
        for delta in range(1, tolerance + 1):
            if (year - delta) in yearly:
                return yearly[year - delta]
            if (year + delta) in yearly:
                return yearly[year + delta]
        return None

    def enrich_publications(
        self,
        publications: list[Publication],
        *,
        tolerance: int = 1,
    ) -> list[Publication]:
        """Return a copy of *publications* enriched with IF data from this database.

        For each publication that has a journal with at least one ISSN, an IF
        lookup is performed.  If a value is found, the publication's
        :attr:`~papertrail.models.JournalInfo.impact_factor` and
        :attr:`~papertrail.models.JournalInfo.impact_factor_year` fields are
        updated.

        Args:
            publications: Original list of publications.
            tolerance: Year tolerance passed to :meth:`get_impact_factor`.

        Returns:
            A new list of :class:`~papertrail.models.Publication` objects.
            Publications without journal data are returned unchanged.
        """
        result: list[Publication] = []
        for pub in publications:
            if pub.journal and pub.journal.issn:
                for issn in pub.journal.issn:
                    if_val = self.get_impact_factor(issn, pub.year, tolerance=tolerance)
                    if if_val is not None:
                        new_journal = pub.journal.model_copy(
                            update={
                                "impact_factor": if_val,
                                "impact_factor_year": pub.year,
                            }
                        )
                        pub = pub.model_copy(update={"journal": new_journal})
                        break
            result.append(pub)
        return result

load_csv

load_csv(path: Path) -> None

Load impact factors from a CSV file.

The file must contain at minimum the columns issn, year, and impact_factor. Additional columns are silently ignored.

Parameters:

Name Type Description Default
path Path

Path to the CSV file.

required

Raises:

Type Description
FileNotFoundError

If path does not exist.

KeyError

If a required column is missing.

ValueError

If a numeric field cannot be parsed.

Source code in src/papertrail/metrics/impact_factor.py
def load_csv(self, path: Path) -> None:
    """Load impact factors from a CSV file.

    The file must contain at minimum the columns ``issn``, ``year``, and
    ``impact_factor``.  Additional columns are silently ignored.

    Args:
        path: Path to the CSV file.

    Raises:
        FileNotFoundError: If *path* does not exist.
        KeyError: If a required column is missing.
        ValueError: If a numeric field cannot be parsed.
    """
    with path.open(newline="", encoding="utf-8") as fh:
        reader = csv.DictReader(fh)
        for row in reader:
            issn = row["issn"].strip()
            year = int(row["year"].strip())
            value = float(row["impact_factor"].strip())
            self._data.setdefault(issn, {})[year] = value

load_json

load_json(path: Path) -> None

Load impact factors from a JSON file.

The file must be a JSON object mapping ISSN strings to objects that map year strings (or integers) to float values.

Parameters:

Name Type Description Default
path Path

Path to the JSON file.

required

Raises:

Type Description
FileNotFoundError

If path does not exist.

JSONDecodeError

If the file is not valid JSON.

ValueError

If a numeric field cannot be parsed.

Source code in src/papertrail/metrics/impact_factor.py
def load_json(self, path: Path) -> None:
    """Load impact factors from a JSON file.

    The file must be a JSON object mapping ISSN strings to objects that
    map year strings (or integers) to float values.

    Args:
        path: Path to the JSON file.

    Raises:
        FileNotFoundError: If *path* does not exist.
        json.JSONDecodeError: If the file is not valid JSON.
        ValueError: If a numeric field cannot be parsed.
    """
    raw: dict[str, dict[str, float]] = json.loads(path.read_text(encoding="utf-8"))
    for issn, year_map in raw.items():
        entry = self._data.setdefault(issn, {})
        for year_key, value in year_map.items():
            entry[int(year_key)] = float(value)

get_impact_factor

get_impact_factor(
    issn: str, year: int, *, tolerance: int = 1
) -> float | None

Return the impact factor for a journal in a given year.

If an exact match is not found, values within ±tolerance years are checked in order of proximity.

Parameters:

Name Type Description Default
issn str

ISSN string (e.g. "0028-0836").

required
year int

Target year.

required
tolerance int

How many years to search around year when an exact match is unavailable. Defaults to 1.

1

Returns:

Type Description
float | None

The impact factor as a float, or None if no data is available.

Source code in src/papertrail/metrics/impact_factor.py
def get_impact_factor(
    self,
    issn: str,
    year: int,
    *,
    tolerance: int = 1,
) -> float | None:
    """Return the impact factor for a journal in a given year.

    If an exact match is not found, values within ``±tolerance`` years
    are checked in order of proximity.

    Args:
        issn: ISSN string (e.g. ``"0028-0836"``).
        year: Target year.
        tolerance: How many years to search around *year* when an exact
            match is unavailable.  Defaults to ``1``.

    Returns:
        The impact factor as a float, or ``None`` if no data is available.
    """
    yearly = self._data.get(issn)
    if yearly is None:
        return None
    if year in yearly:
        return yearly[year]
    for delta in range(1, tolerance + 1):
        if (year - delta) in yearly:
            return yearly[year - delta]
        if (year + delta) in yearly:
            return yearly[year + delta]
    return None

enrich_publications

enrich_publications(
    publications: list[Publication], *, tolerance: int = 1
) -> list[Publication]

Return a copy of publications enriched with IF data from this database.

For each publication that has a journal with at least one ISSN, an IF lookup is performed. If a value is found, the publication's :attr:~papertrail.models.JournalInfo.impact_factor and :attr:~papertrail.models.JournalInfo.impact_factor_year fields are updated.

Parameters:

Name Type Description Default
publications list[Publication]

Original list of publications.

required
tolerance int

Year tolerance passed to :meth:get_impact_factor.

1

Returns:

Type Description
list[Publication]

A new list of :class:~papertrail.models.Publication objects.

list[Publication]

Publications without journal data are returned unchanged.

Source code in src/papertrail/metrics/impact_factor.py
def enrich_publications(
    self,
    publications: list[Publication],
    *,
    tolerance: int = 1,
) -> list[Publication]:
    """Return a copy of *publications* enriched with IF data from this database.

    For each publication that has a journal with at least one ISSN, an IF
    lookup is performed.  If a value is found, the publication's
    :attr:`~papertrail.models.JournalInfo.impact_factor` and
    :attr:`~papertrail.models.JournalInfo.impact_factor_year` fields are
    updated.

    Args:
        publications: Original list of publications.
        tolerance: Year tolerance passed to :meth:`get_impact_factor`.

    Returns:
        A new list of :class:`~papertrail.models.Publication` objects.
        Publications without journal data are returned unchanged.
    """
    result: list[Publication] = []
    for pub in publications:
        if pub.journal and pub.journal.issn:
            for issn in pub.journal.issn:
                if_val = self.get_impact_factor(issn, pub.year, tolerance=tolerance)
                if if_val is not None:
                    new_journal = pub.journal.model_copy(
                        update={
                            "impact_factor": if_val,
                            "impact_factor_year": pub.year,
                        }
                    )
                    pub = pub.model_copy(update={"journal": new_journal})
                    break
        result.append(pub)
    return result

AuthorInfo

Bases: BaseModel

Identifies a single author on a publication.

Attributes:

Name Type Description
id str | None

Unique identifier (e.g. OpenAlex author ID URL).

name str

Full display name.

orcid str | None

ORCID identifier URL, if available.

affiliations list[Affiliation]

Institutional affiliations associated with this authorship.

Source code in src/papertrail/models.py
class AuthorInfo(BaseModel):
    """Identifies a single author on a publication.

    Attributes:
        id: Unique identifier (e.g. OpenAlex author ID URL).
        name: Full display name.
        orcid: ORCID identifier URL, if available.
        affiliations: Institutional affiliations associated with this authorship.
    """

    id: str | None = None
    name: str
    orcid: str | None = None
    affiliations: list[Affiliation] = Field(default_factory=list)

AuthorMetrics

Bases: BaseModel

Aggregated bibliometric metrics for an author.

Attributes:

Name Type Description
author_name str

Display name used to retrieve publications.

openalex_id str | None

OpenAlex author ID URL, if resolved.

orcid str | None

ORCID identifier URL, if available.

total_publications int

Total number of retrieved publications.

total_citations int

Sum of citation counts across all publications.

h_index int

Hirsch index.

i10_index int

Number of publications with at least 10 citations.

average_citations_per_paper float

Mean citations per publication.

most_cited_paper_title str | None

Title of the most-cited publication.

most_cited_paper_citations int

Citation count of the most-cited publication.

publications_per_year dict[int, int]

Mapping of year -> publication count.

citations_per_year dict[int, int]

Mapping of year -> sum of citations for that year's pubs.

publications_refereed_per_year dict[int, int]

Mapping of year -> refereed publication count.

publications_non_refereed_per_year dict[int, int]

Mapping of year -> non-refereed publication count.

publications_refereed_normalized_per_year dict[int, float]

Mapping of year -> refereed publication fraction within that year.

publications_non_refereed_normalized_per_year dict[int, float]

Mapping of year -> non-refereed publication fraction within that year.

citations_refereed_per_year dict[int, int]

Mapping of year -> citations from refereed publications.

citations_non_refereed_per_year dict[int, int]

Mapping of year -> citations from non-refereed publications.

citations_refereed_normalized_per_year dict[int, float]

Mapping of year -> refereed citation fraction within that year.

citations_non_refereed_normalized_per_year dict[int, float]

Mapping of year -> non-refereed citation fraction within that year.

index_timeseries_total dict[str, dict[int, float]]

Mapping of index name -> year -> value.

index_timeseries_refereed dict[str, dict[int, float]]

Mapping of index name -> year -> value.

index_indicators_total dict[str, float]

Mapping of index name -> snapshot value.

index_indicators_refereed dict[str, float]

Mapping of index name -> snapshot value.

publication_types dict[str, int]

Mapping of publication type -> publication count.

journals_per_publication dict[str, int]

Mapping of journal/venue name -> publication count.

citation_distribution dict[str, int]

Mapping of citation bucket -> publication count.

refereed_publications int | None

Count of publications marked as refereed.

non_refereed_publications int | None

Count of publications marked as non-refereed.

avg_impact_factor float | None

Mean impact factor across publications with IF data.

median_impact_factor float | None

Median impact factor across publications with IF data.

Source code in src/papertrail/models.py
class AuthorMetrics(BaseModel):
    """Aggregated bibliometric metrics for an author.

    Attributes:
        author_name: Display name used to retrieve publications.
        openalex_id: OpenAlex author ID URL, if resolved.
        orcid: ORCID identifier URL, if available.
        total_publications: Total number of retrieved publications.
        total_citations: Sum of citation counts across all publications.
        h_index: Hirsch index.
        i10_index: Number of publications with at least 10 citations.
        average_citations_per_paper: Mean citations per publication.
        most_cited_paper_title: Title of the most-cited publication.
        most_cited_paper_citations: Citation count of the most-cited publication.
        publications_per_year: Mapping of year -> publication count.
        citations_per_year: Mapping of year -> sum of citations for that year's pubs.
        publications_refereed_per_year: Mapping of year -> refereed publication count.
        publications_non_refereed_per_year: Mapping of year -> non-refereed publication count.
        publications_refereed_normalized_per_year: Mapping of year -> refereed
            publication fraction within that year.
        publications_non_refereed_normalized_per_year: Mapping of year ->
            non-refereed publication fraction within that year.
        citations_refereed_per_year: Mapping of year -> citations from refereed
            publications.
        citations_non_refereed_per_year: Mapping of year -> citations from
            non-refereed publications.
        citations_refereed_normalized_per_year: Mapping of year -> refereed
            citation fraction within that year.
        citations_non_refereed_normalized_per_year: Mapping of year ->
            non-refereed citation fraction within that year.
        index_timeseries_total: Mapping of index name -> year -> value.
        index_timeseries_refereed: Mapping of index name -> year -> value.
        index_indicators_total: Mapping of index name -> snapshot value.
        index_indicators_refereed: Mapping of index name -> snapshot value.
        publication_types: Mapping of publication type -> publication count.
        journals_per_publication: Mapping of journal/venue name -> publication count.
        citation_distribution: Mapping of citation bucket -> publication count.
        refereed_publications: Count of publications marked as refereed.
        non_refereed_publications: Count of publications marked as non-refereed.
        avg_impact_factor: Mean impact factor across publications with IF data.
        median_impact_factor: Median impact factor across publications with IF data.
    """

    author_name: str
    openalex_id: str | None = None
    orcid: str | None = None
    total_publications: int = 0
    total_citations: int = 0
    h_index: int = 0
    i10_index: int = 0
    average_citations_per_paper: float = 0.0
    most_cited_paper_title: str | None = None
    most_cited_paper_citations: int = 0
    publications_per_year: dict[int, int] = Field(default_factory=dict)
    citations_per_year: dict[int, int] = Field(default_factory=dict)
    publications_refereed_per_year: dict[int, int] = Field(default_factory=dict)
    publications_non_refereed_per_year: dict[int, int] = Field(default_factory=dict)
    publications_refereed_normalized_per_year: dict[int, float] = Field(
        default_factory=dict
    )
    publications_non_refereed_normalized_per_year: dict[int, float] = Field(
        default_factory=dict
    )
    citations_refereed_per_year: dict[int, int] = Field(default_factory=dict)
    citations_non_refereed_per_year: dict[int, int] = Field(default_factory=dict)
    citations_refereed_normalized_per_year: dict[int, float] = Field(
        default_factory=dict
    )
    citations_non_refereed_normalized_per_year: dict[int, float] = Field(
        default_factory=dict
    )
    index_timeseries_total: dict[str, dict[int, float]] = Field(default_factory=dict)
    index_timeseries_refereed: dict[str, dict[int, float]] = Field(
        default_factory=dict
    )
    index_indicators_total: dict[str, float] = Field(default_factory=dict)
    index_indicators_refereed: dict[str, float] = Field(default_factory=dict)
    publication_types: dict[str, int] = Field(default_factory=dict)
    journals_per_publication: dict[str, int] = Field(default_factory=dict)
    citation_distribution: dict[str, int] = Field(default_factory=dict)
    refereed_publications: int | None = None
    non_refereed_publications: int | None = None
    avg_impact_factor: float | None = None
    median_impact_factor: float | None = None

JournalInfo

Bases: BaseModel

Journal or venue metadata.

Attributes:

Name Type Description
id str | None

Unique identifier (e.g. OpenAlex source ID URL).

name str

Full journal/venue name.

issn list[str]

List of ISSN numbers (print and electronic).

publisher str | None

Publisher name.

impact_factor float | None

Impact factor or proxy metric (e.g. OpenAlex 2yr_mean_citedness) at or near the year of publication.

impact_factor_year int | None

Year the impact factor value corresponds to.

Source code in src/papertrail/models.py
class JournalInfo(BaseModel):
    """Journal or venue metadata.

    Attributes:
        id: Unique identifier (e.g. OpenAlex source ID URL).
        name: Full journal/venue name.
        issn: List of ISSN numbers (print and electronic).
        publisher: Publisher name.
        impact_factor: Impact factor or proxy metric (e.g. OpenAlex
            ``2yr_mean_citedness``) at or near the year of publication.
        impact_factor_year: Year the impact factor value corresponds to.
    """

    id: str | None = None
    name: str
    issn: list[str] = Field(default_factory=list)
    publisher: str | None = None
    impact_factor: float | None = None
    impact_factor_year: int | None = None

Publication

Bases: BaseModel

A single scientific publication.

Attributes:

Name Type Description
id str

Unique identifier (e.g. OpenAlex work ID URL).

title str

Publication title.

year int

Publication year.

doi str | None

Digital Object Identifier (without the https://doi.org/ prefix).

authors list[AuthorInfo]

Ordered list of authors.

journal JournalInfo | None

Journal or venue metadata.

citation_count int

Total citations received.

abstract str | None

Plain-text abstract, if available.

type str | None

Publication type string (e.g. "journal-article", "proceedings-article").

refereed bool | None

Whether this record is marked as refereed by the source, when available (not provided by all data sources).

open_access bool

Whether the publication is openly accessible.

url str | None

Landing-page URL for the publication.

Source code in src/papertrail/models.py
class Publication(BaseModel):
    """A single scientific publication.

    Attributes:
        id: Unique identifier (e.g. OpenAlex work ID URL).
        title: Publication title.
        year: Publication year.
        doi: Digital Object Identifier (without the ``https://doi.org/`` prefix).
        authors: Ordered list of authors.
        journal: Journal or venue metadata.
        citation_count: Total citations received.
        abstract: Plain-text abstract, if available.
        type: Publication type string (e.g. ``"journal-article"``,
            ``"proceedings-article"``).
        refereed: Whether this record is marked as refereed by the source,
            when available (not provided by all data sources).
        open_access: Whether the publication is openly accessible.
        url: Landing-page URL for the publication.
    """

    id: str
    title: str
    year: int
    doi: str | None = None
    authors: list[AuthorInfo] = Field(default_factory=list)
    journal: JournalInfo | None = None
    citation_count: int = 0
    abstract: str | None = None
    type: str | None = None
    refereed: bool | None = None
    open_access: bool = False
    url: str | None = None

build_author_dashboard

build_author_dashboard(metrics: AuthorMetrics) -> object

Build the initial multi-plot dashboard for an author.

Parameters:

Name Type Description Default
metrics AuthorMetrics

Computed author metrics.

required

Returns:

Type Description
object

A Bokeh layout containing the available plots.

Source code in src/papertrail/plots/bokeh_plotter.py
def build_author_dashboard(metrics: AuthorMetrics) -> object:
    """Build the initial multi-plot dashboard for an author.

    Args:
        metrics: Computed author metrics.

    Returns:
        A Bokeh layout containing the available plots.
    """
    header = Div(
        text=(
            f"<h1>{metrics.author_name}</h1>"
            f"<p>Total publications: {metrics.total_publications} | "
            f"Total citations: {metrics.total_citations} | "
            f"h-index: {metrics.h_index}</p>"
        )
    )

    tabs: list[TabPanel] = [
        TabPanel(title="Publications/Year", child=build_publications_per_year_plot(metrics)),
        TabPanel(title="Citations/Year", child=build_citations_per_year_plot(metrics)),
    ]
    refereed_plot = build_refereed_breakdown_plot(metrics)
    if refereed_plot is not None:
        tabs.append(TabPanel(title="Refereed Split", child=refereed_plot))
    pub_grouped_total = build_refereed_year_comparison_plot(
        metrics,
        value="publications",
        mode="grouped",
        normalized=False,
    )
    if pub_grouped_total is not None:
        tabs.append(TabPanel(title="Pubs Grouped Total", child=pub_grouped_total))
    pub_stacked_total = build_refereed_year_comparison_plot(
        metrics,
        value="publications",
        mode="stacked",
        normalized=False,
    )
    if pub_stacked_total is not None:
        tabs.append(TabPanel(title="Pubs Stacked Total", child=pub_stacked_total))
    pub_grouped_norm = build_refereed_year_comparison_plot(
        metrics,
        value="publications",
        mode="grouped",
        normalized=True,
    )
    if pub_grouped_norm is not None:
        tabs.append(TabPanel(title="Pubs Grouped Norm", child=pub_grouped_norm))
    pub_stacked_norm = build_refereed_year_comparison_plot(
        metrics,
        value="publications",
        mode="stacked",
        normalized=True,
    )
    if pub_stacked_norm is not None:
        tabs.append(TabPanel(title="Pubs Stacked Norm", child=pub_stacked_norm))
    cit_grouped_total = build_refereed_year_comparison_plot(
        metrics,
        value="citations",
        mode="grouped",
        normalized=False,
    )
    if cit_grouped_total is not None:
        tabs.append(TabPanel(title="Cites Grouped Total", child=cit_grouped_total))
    cit_stacked_total = build_refereed_year_comparison_plot(
        metrics,
        value="citations",
        mode="stacked",
        normalized=False,
    )
    if cit_stacked_total is not None:
        tabs.append(TabPanel(title="Cites Stacked Total", child=cit_stacked_total))
    cit_grouped_norm = build_refereed_year_comparison_plot(
        metrics,
        value="citations",
        mode="grouped",
        normalized=True,
    )
    if cit_grouped_norm is not None:
        tabs.append(TabPanel(title="Cites Grouped Norm", child=cit_grouped_norm))
    cit_stacked_norm = build_refereed_year_comparison_plot(
        metrics,
        value="citations",
        mode="stacked",
        normalized=True,
    )
    if cit_stacked_norm is not None:
        tabs.append(TabPanel(title="Cites Stacked Norm", child=cit_stacked_norm))
    indices_timeseries_plot = build_index_timeseries_plots(metrics)
    if indices_timeseries_plot is not None:
        tabs.append(TabPanel(title="Indices Over Time", child=indices_timeseries_plot))
    index_snapshot_plot = build_index_snapshot_plot(metrics)
    if index_snapshot_plot is not None:
        tabs.append(TabPanel(title="Indices Snapshot", child=index_snapshot_plot))
    citation_distribution_plot = build_citation_distribution_plot(metrics)
    if citation_distribution_plot is not None:
        tabs.append(TabPanel(title="Citation Distribution", child=citation_distribution_plot))
    publication_type_plot = build_publication_type_breakdown_plot(metrics)
    if publication_type_plot is not None:
        tabs.append(TabPanel(title="Publication Types", child=publication_type_plot))
    top_journals_plot = build_top_journals_plot(metrics)
    if top_journals_plot is not None:
        tabs.append(TabPanel(title="Top Venues", child=top_journals_plot))

    tabs_view = Tabs(tabs=tabs, sizing_mode="stretch_width")
    return column(header, tabs_view, sizing_mode="stretch_width")

build_citations_per_year_plot

build_citations_per_year_plot(
    metrics: AuthorMetrics,
) -> object

Build an interactive line chart of citations per publication year.

Parameters:

Name Type Description Default
metrics AuthorMetrics

Computed author metrics.

required

Returns:

Type Description
object

A Bokeh figure.

Source code in src/papertrail/plots/bokeh_plotter.py
def build_citations_per_year_plot(metrics: AuthorMetrics) -> object:
    """Build an interactive line chart of citations per publication year.

    Args:
        metrics: Computed author metrics.

    Returns:
        A Bokeh figure.
    """
    years, counts = _sorted_year_mapping(metrics.citations_per_year)
    source = ColumnDataSource({"year": years, "count": counts})
    plot = figure(
        title="Citations per year",
        x_axis_label="Year",
        y_axis_label="Citations",
        sizing_mode="stretch_width",
        height=320,
        tools=INTERACTIVE_TOOLS,
    )
    plot.line(x="year", y="count", line_width=3, source=source, color="#C05621")
    plot.scatter(x="year", y="count", size=8, source=source, color="#C05621")
    plot.add_tools(HoverTool(tooltips=[("Year", "@year"), ("Citations", COUNT_FIELD)]))
    plot.xaxis.ticker = years
    return plot

build_publications_per_year_plot

build_publications_per_year_plot(
    metrics: AuthorMetrics,
) -> object

Build an interactive bar chart of publications per year.

Parameters:

Name Type Description Default
metrics AuthorMetrics

Computed author metrics.

required

Returns:

Type Description
object

A Bokeh figure.

Source code in src/papertrail/plots/bokeh_plotter.py
def build_publications_per_year_plot(metrics: AuthorMetrics) -> object:
    """Build an interactive bar chart of publications per year.

    Args:
        metrics: Computed author metrics.

    Returns:
        A Bokeh figure.
    """
    years, counts = _sorted_year_mapping(metrics.publications_per_year)
    source = ColumnDataSource({"year": years, "count": counts})
    plot = figure(
        title="Publications per year",
        x_axis_label="Year",
        y_axis_label="Publications",
        sizing_mode="stretch_width",
        height=320,
        tools=INTERACTIVE_TOOLS,
    )
    plot.vbar(x="year", top="count", width=0.8, source=source, color="#2B6CB0")
    plot.add_tools(
        HoverTool(tooltips=[("Year", "@year"), ("Publications", COUNT_FIELD)])
    )
    plot.xaxis.ticker = years
    return plot

build_refereed_breakdown_plot

build_refereed_breakdown_plot(
    metrics: AuthorMetrics,
) -> object | None

Build a bar chart comparing refereed and non-refereed publication counts.

Parameters:

Name Type Description Default
metrics AuthorMetrics

Computed author metrics.

required

Returns:

Type Description
object | None

A Bokeh figure when refereed metadata is available, otherwise None.

Source code in src/papertrail/plots/bokeh_plotter.py
def build_refereed_breakdown_plot(metrics: AuthorMetrics) -> object | None:
    """Build a bar chart comparing refereed and non-refereed publication counts.

    Args:
        metrics: Computed author metrics.

    Returns:
        A Bokeh figure when refereed metadata is available, otherwise ``None``.
    """
    if (
        metrics.refereed_publications is None
        or metrics.non_refereed_publications is None
    ):
        return None

    labels = ["Refereed", "Non-refereed"]
    counts = [metrics.refereed_publications, metrics.non_refereed_publications]
    source = ColumnDataSource(
        {
            "label": labels,
            "count": counts,
            "color": ["#2F855A", "#718096"],
        }
    )
    plot = figure(
        x_range=labels,
        title="Refereed breakdown",
        x_axis_label="Category",
        y_axis_label="Publications",
        sizing_mode="stretch_width",
        height=320,
        tools=INTERACTIVE_TOOLS,
    )
    plot.vbar(
        x="label",
        top="count",
        width=0.6,
        source=source,
        fill_color="color",
        line_color="color",
    )
    plot.add_tools(HoverTool(tooltips=[("Category", "@label"), ("Count", COUNT_FIELD)]))
    return plot

export_dashboard

export_dashboard(
    metrics: AuthorMetrics,
    path: str | Path,
    *,
    fmt: PlotFormat,
) -> None

Export the default dashboard as HTML or JSON.

Parameters:

Name Type Description Default
metrics AuthorMetrics

Computed author metrics.

required
path str | Path

Output file path.

required
fmt PlotFormat

One of "html", "json", "png", or "pdf".

required

Raises:

Type Description
ExportError

If the output cannot be written.

Source code in src/papertrail/plots/bokeh_plotter.py
def export_dashboard(
    metrics: AuthorMetrics,
    path: str | Path,
    *,
    fmt: PlotFormat,
) -> None:
    """Export the default dashboard as HTML or JSON.

    Args:
        metrics: Computed author metrics.
        path: Output file path.
        fmt: One of ``"html"``, ``"json"``, ``"png"``, or ``"pdf"``.

    Raises:
        ExportError: If the output cannot be written.
    """
    dashboard = build_author_dashboard(metrics)
    output_path = Path(path)
    try:
        if fmt == "html":
            output_file(output_path)
            save(
                dashboard,
                filename=output_path,
                title=f"papertrail - {metrics.author_name}",
            )
            return

        if fmt == "json":
            payload = json_item(dashboard, target="papertrail-dashboard")
            output_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
            return

        if fmt == "png":
            _export_png_dashboard(dashboard, output_path)
            return

        if fmt == "pdf":
            _export_pdf_dashboard(dashboard, output_path)
            return

        raise ExportError(f"Unsupported plot export format: {fmt!r}")
    except OSError as exc:
        raise ExportError(f"Could not write plot output to '{output_path}'") from exc