Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions bcb/currency.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,27 @@ def _get_symbol_text(
CurrencyOutput = Literal["dataframe", "text"]


def _tidy_df(df: pd.DataFrame, side: CurrencySide) -> pd.DataFrame:
frames = []
for position, column in enumerate(df.columns):
symbol, rate_side = column
if side != "both" and rate_side != side:
continue
frames.append(
pd.DataFrame(
{
"Date": df.index,
"symbol": symbol,
"side": rate_side,
"value": df.iloc[:, position].to_numpy(),
}
)
)
if not frames:
return pd.DataFrame(columns=["Date", "symbol", "side", "value"])
return pd.concat(frames, ignore_index=True)


def _normalize_currency_symbols(symbols: Union[str, List[str]]) -> List[str]:
if isinstance(symbols, str):
symbols = [symbols]
Expand Down Expand Up @@ -634,6 +655,7 @@ def get(
side: CurrencySide = ...,
groupby: CurrencyGroupBy = ...,
output: Literal["dataframe"] = ...,
tidy: bool = ...,
*,
timeout: RequestTimeout = ...,
) -> pd.DataFrame: ...
Expand All @@ -647,6 +669,7 @@ def get(
side: CurrencySide = ...,
groupby: CurrencyGroupBy = ...,
output: Literal["dataframe"] = ...,
tidy: bool = ...,
*,
timeout: RequestTimeout = ...,
) -> pd.DataFrame: ...
Expand All @@ -660,6 +683,7 @@ def get(
side: CurrencySide = ...,
groupby: CurrencyGroupBy = ...,
output: Literal["text"] = ...,
tidy: bool = ...,
*,
timeout: RequestTimeout = ...,
) -> str: ...
Expand All @@ -673,6 +697,7 @@ def get(
side: CurrencySide = ...,
groupby: CurrencyGroupBy = ...,
output: Literal["text"] = ...,
tidy: bool = ...,
*,
timeout: RequestTimeout = ...,
) -> CurrencyTextResult: ...
Expand All @@ -685,6 +710,7 @@ def get(
side: CurrencySide = "ask",
groupby: CurrencyGroupBy = "symbol",
output: CurrencyOutput = "dataframe",
tidy: bool = False,
*,
timeout: RequestTimeout = None,
) -> Union[pd.DataFrame, str, Dict[str, str]]:
Expand Down Expand Up @@ -712,6 +738,11 @@ def get(
por ``side``.
output : {"dataframe", "text"}, default "dataframe"
Define o formato de saída. Use ``"text"`` para retornar o CSV bruto.
tidy : bool, default False
Quando ``True`` e ``output='dataframe'``, retorna um DataFrame em
formato tidy com colunas ``Date``, ``symbol``, ``side`` e ``value``.
Quando ``False``, mantém o formato largo padrão. Não altera
``output='text'``.
timeout : float or httpx.Timeout, optional
Timeout por requisição HTTP, em segundos ou como ``httpx.Timeout``.
Quando omitido, usa o timeout padrão do cliente compartilhado.
Expand Down Expand Up @@ -756,6 +787,8 @@ def get(
pass # Skip missing currencies
if len(dss) > 0:
df = pd.concat(dss, axis=1)
if tidy:
return _tidy_df(df, side)
if side in ("bid", "ask"):
dx = df.reorder_levels([1, 0], axis=1).sort_index(axis=1)
return dx[side]
Expand Down Expand Up @@ -956,6 +989,7 @@ async def async_get(
side: CurrencySide = "ask",
groupby: CurrencyGroupBy = "symbol",
output: CurrencyOutput = "dataframe",
tidy: bool = False,
*,
timeout: RequestTimeout = None,
) -> Union[pd.DataFrame, str, Dict[str, str]]:
Expand All @@ -982,6 +1016,11 @@ async def async_get(
``'symbol'`` ou ``'side'``
output : {"dataframe", "text"}
``'dataframe'`` ou ``'text'``
tidy : bool, default False
Quando ``True`` e ``output='dataframe'``, retorna um DataFrame em
formato tidy com colunas ``Date``, ``symbol``, ``side`` e ``value``.
Quando ``False``, mantém o formato largo padrão. Não altera
``output='text'``.
timeout : float or httpx.Timeout, optional
Timeout por requisição HTTP, em segundos ou como ``httpx.Timeout``.
Quando omitido, usa o timeout padrão do cliente compartilhado.
Expand Down Expand Up @@ -1030,6 +1069,8 @@ async def async_get(

if len(valid_dss) > 0:
df = pd.concat(valid_dss, axis=1)
if tidy:
return _tidy_df(df, side)
if side in ("bid", "ask"):
dx = df.reorder_levels([1, 0], axis=1).sort_index(axis=1)
return dx[side]
Expand Down
34 changes: 34 additions & 0 deletions bcb/sgs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,23 @@ def _format_df(df: pd.DataFrame, code: SGSCode, freq: Optional[str]) -> pd.DataF
return df


def _tidy_df(df: pd.DataFrame) -> pd.DataFrame:
frames = []
for position, series_name in enumerate(df.columns):
frames.append(
pd.DataFrame(
{
"Date": df.index,
"series": series_name,
"value": df.iloc[:, position].to_numpy(),
}
)
)
if not frames:
return pd.DataFrame(columns=["Date", "series", "value"])
return pd.concat(frames, ignore_index=True)


@overload
def get(
codes: SGSCodeInput,
Expand All @@ -268,6 +285,7 @@ def get(
multi: bool = ...,
freq: Optional[str] = ...,
output: Literal["dataframe"] = ...,
tidy: bool = ...,
*,
timeout: RequestTimeout = ...,
) -> Union[pd.DataFrame, List[pd.DataFrame]]: ...
Expand All @@ -282,6 +300,7 @@ def get(
multi: bool = ...,
freq: Optional[str] = ...,
output: Literal["text"] = ...,
tidy: bool = ...,
*,
timeout: RequestTimeout = ...,
) -> Union[str, Dict[int, str]]: ...
Expand All @@ -295,6 +314,7 @@ def get(
multi: bool = True,
freq: Optional[str] = None,
output: Literal["dataframe", "text"] = "dataframe",
tidy: bool = False,
*,
timeout: RequestTimeout = None,
) -> Union[pd.DataFrame, List[pd.DataFrame], str, Dict[int, str]]:
Expand Down Expand Up @@ -334,6 +354,10 @@ def get(
um DataFrame pandas, ou ``'text'`` para retornar o JSON bruto da API
do BCB. Para um único código retorna uma string; para múltiplos
códigos retorna um ``dict`` mapeando código inteiro → JSON string.
tidy : bool, default False
Quando ``True`` e ``output='dataframe'``, retorna um DataFrame em
formato tidy com colunas ``Date``, ``series`` e ``value``. Quando
``False``, mantém o formato largo padrão. Não altera ``output='text'``.
timeout : float or httpx.Timeout, optional
Timeout por tentativa HTTP, em segundos ou como ``httpx.Timeout``.
Quando omitido, usa o timeout padrão do cliente compartilhado.
Expand Down Expand Up @@ -376,6 +400,9 @@ def get(
df = pd.read_json(StringIO(text))
df = _format_df(df, code, freq)
dfs.append(df)

if tidy:
return _tidy_df(pd.concat(dfs, axis=1))
if len(dfs) == 1:
return dfs[0]
else:
Expand Down Expand Up @@ -509,6 +536,7 @@ async def async_get(
multi: bool = True,
freq: Optional[str] = None,
output: Literal["dataframe", "text"] = "dataframe",
tidy: bool = False,
*,
timeout: RequestTimeout = None,
) -> Union[pd.DataFrame, List[pd.DataFrame], str, Dict[int, str]]:
Expand Down Expand Up @@ -536,6 +564,10 @@ async def async_get(
Frequência a ser utilizada na série temporal
output : str
Formato de saída: ``'dataframe'`` ou ``'text'``
tidy : bool, default False
Quando ``True`` e ``output='dataframe'``, retorna um DataFrame em
formato tidy com colunas ``Date``, ``series`` e ``value``. Quando
``False``, mantém o formato largo padrão. Não altera ``output='text'``.
timeout : float or httpx.Timeout, optional
Timeout por tentativa HTTP, em segundos ou como ``httpx.Timeout``.
Quando omitido, usa o timeout padrão do cliente compartilhado.
Expand Down Expand Up @@ -566,6 +598,8 @@ async def async_get(
_format_df(pd.read_json(StringIO(t)), c, freq)
for c, t in zip(code_list, texts, strict=True)
]
if tidy:
return _tidy_df(pd.concat(dfs, axis=1))
if len(dfs) == 1:
return dfs[0]
else:
Expand Down
5 changes: 5 additions & 0 deletions docs/async.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ Busca taxas de câmbio de forma assíncrona com a mesma interface que a versão

asyncio.run(main())

As versões assíncronas aceitam os mesmos formatos de saída das versões
síncronas. Em ``sgs.async_get(..., tidy=True)``, o DataFrame tem colunas
``Date``, ``series`` e ``value``. Em ``currency.async_get(..., tidy=True)``, o
DataFrame tem colunas ``Date``, ``symbol``, ``side`` e ``value``.

OData: async_collect()
----------------------

Expand Down
18 changes: 18 additions & 0 deletions docs/currency.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,24 @@ em cache.

currency.get_currency_list().head()

Formato tidy em moedas
^^^^^^^^^^^^^^^^^^^^^^

Por padrão, :py:func:`bcb.currency.get` retorna um DataFrame no formato largo.
Para retornar uma tabela longa, use ``tidy=True``. Nesse modo, o DataFrame tem
as colunas ``Date``, ``symbol``, ``side`` e ``value``.

.. ipython:: python

currency.get(['USD', 'EUR'],
start='2024-01-01',
end='2024-01-31',
side='both',
tidy=True).head()

O parâmetro ``tidy`` só afeta o retorno ``output='dataframe'``. Quando
``output='text'`` é usado, a função continua retornando o CSV bruto.

Obtendo o CSV bruto
^^^^^^^^^^^^^^^^^^^

Expand Down
18 changes: 18 additions & 0 deletions docs/sgs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,24 @@ Se a consulta continuar lenta mesmo com timeout maior, divida o período em
janelas menores e concatene os resultados.


Formato tidy no SGS
-------------------

Por padrão, :py:func:`bcb.sgs.get` retorna um DataFrame no formato largo. Para
retornar uma tabela longa, use ``tidy=True``. Nesse modo, o DataFrame tem as
colunas ``Date``, ``series`` e ``value``. A coluna ``series`` usa o nome
informado em ``codes``; quando nenhum nome é informado, usa o código numérico
da série.

.. ipython:: python

from bcb import sgs
sgs.get({'SELIC': 11, 'IPCA': 433}, start='2024-01-01', tidy=True).head()

O parâmetro ``tidy`` só afeta o retorno ``output='dataframe'``. Quando
``output='text'`` é usado, a função continua retornando o JSON bruto.


Exemplos
--------

Expand Down
50 changes: 50 additions & 0 deletions tests/sgs/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
r".*bcdata\.sgs\.1[^0-9].*|.*bcdata\.sgs\.1$|.*bcdata\.sgs\.1/.*"
)
SGS_CODE_99999_URL = re.compile(r".*bcdata\.sgs\.99999.*")
SGS_CODE_2_URL = re.compile(
r".*bcdata\.sgs\.2[^0-9].*|.*bcdata\.sgs\.2$|.*bcdata\.sgs\.2/.*"
)


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -133,6 +136,53 @@ def test_get_with_named_code(httpx_mock):
assert len(df) == 5


def test_get_tidy_single_code(httpx_mock):
httpx_mock.add_response(
url=SGS_CODE_1_URL,
text=SGS_JSON_5,
status_code=200,
)

df = sgs.get(1, last=5, tidy=True)

assert isinstance(df, pd.DataFrame)
assert list(df.columns) == ["Date", "series", "value"]
assert df["series"].unique().tolist() == ["1"]
assert len(df) == 5


def test_get_tidy_multiple_codes(httpx_mock):
httpx_mock.add_response(
url=SGS_CODE_1_URL,
text=SGS_JSON_5,
status_code=200,
)
httpx_mock.add_response(
url=SGS_CODE_2_URL,
text=SGS_JSON_5,
status_code=200,
)

df = sgs.get([1, 2], last=5, tidy=True)

assert list(df.columns) == ["Date", "series", "value"]
assert set(df["series"]) == {"1", "2"}
assert len(df) == 10


def test_get_tidy_ignores_text_output(httpx_mock):
httpx_mock.add_response(
url=SGS_CODE_1_URL,
text=SGS_JSON_5,
status_code=200,
)

result = sgs.get(1, last=5, output="text", tidy=True)

assert isinstance(result, str)
assert result == SGS_JSON_5


def test_get_json_error_response(httpx_mock):
httpx_mock.add_response(
url=SGS_CODE_99999_URL,
Expand Down
31 changes: 31 additions & 0 deletions tests/test_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,25 @@ async def test_async_get_multiple_codes_concurrent(httpx_mock):
assert df.shape[1] == 2


async def test_async_get_tidy_multiple_codes(httpx_mock):
httpx_mock.add_response(
url=SGS_CODE_URL,
text=SGS_JSON_5,
status_code=200,
)
httpx_mock.add_response(
url=SGS_CODE_URL,
text=SGS_JSON_5,
status_code=200,
)

df = await sgs.async_get([1, 11], tidy=True)

assert list(df.columns) == ["Date", "series", "value"]
assert set(df["series"]) == {"1", "11"}
assert len(df) == 10


async def test_async_get_text_output(httpx_mock):
"""Test async_get() with output='text' returns JSON string."""
httpx_mock.add_response(
Expand Down Expand Up @@ -194,6 +213,18 @@ async def test_async_get_single_symbol_returns_dataframe(httpx_mock):
assert df is not None


async def test_async_get_tidy_single_symbol(httpx_mock):
add_currency_base_mocks(httpx_mock)
add_currency_rate_mock(httpx_mock)

df = await currency.async_get("USD", START, END, side="both", tidy=True)

assert list(df.columns) == ["Date", "symbol", "side", "value"]
assert df["symbol"].unique().tolist() == ["USD"]
assert set(df["side"]) == {"bid", "ask"}
assert len(df) == 10


async def test_async_get_invalid_currency_side_raises():
with pytest.raises(ValueError, match="Unknown side"):
await currency.async_get("USD", START, END, side="mid") # type: ignore[arg-type]
Expand Down
Loading
Loading