import os
import pandas as pd
import numpy as np
import pytz
import refinitiv.data as rd
from datetime import datetime, timedelta, time
from time import sleep
# Suppress downcasting warnings
pd.set_option('future.no_silent_downcasting', True)
# ==========================================================
# 1️⃣ SESSION
# ==========================================================
rd.open_session()
# ==========================================================
# 2️⃣ CONFIG
# ==========================================================
grains_dict = {
"W":10, "KW":10, "C":10, "S":14, "SM":16,
"BO":16, "BL2":8, "COM":8, "EMA":6, "RS":8
}
com_dic = {
"W":"Chicago Wheat", "KW":"Kansas Wheat", "C":"Corn",
"S":"Soybean", "SM":"Soybean Meal", "BO":"Soybean Oil",
"RS":"ICE Canola",
"BL2":"Milling Wheat", "COM":"Rapeseed", "EMA":"Matif Corn"
}
symbols = list(grains_dict.keys())
mn_codes = {
"F":"jan", "G":"feb", "H":"mar", "J":"apr", "K":"may", "M":"jun",
"N":"jul", "Q":"aug", "U":"sep", "V":"oct", "X":"nov", "Z":"dec"
}
MONTH_CODES = list(mn_codes.keys())
# ==========================================================
# 3️⃣ YEAR FORMAT
# ==========================================================
def year_suffix(commodity, year):
single_digit = ["RS", "BL2", "COM", "EMA"]
return str(year)[-1] if commodity in single_digit else str(year)[-2:]
# ==========================================================
# 4️⃣ OUTRIGHT + SPREAD GENERATION
# ==========================================================
def generate_outrights(s, n_contracts):
today = datetime.today()
year = today.year
month_index = today.month - 1
rics = []
for i in range(n_contracts):
m_code = MONTH_CODES[(month_index + i) % 12]
y = year + ((month_index + i) // 12)
suffix = year_suffix(s, y)
ric = f"{s}{m_code}{suffix}"
rics.append(ric)
return rics
def generate_spreads(s, outrights):
spreads = []
for i in range(len(outrights)-1):
for j in range(i+1, len(outrights)):
right_leg = outrights[j].replace(s, "")
spreads.append(f"{outrights[i]}-{right_leg}")
return spreads
# ==========================================================
# 5️⃣ CHUNK FUNCTION
# ==========================================================
def chunkIt(seq, num):
avg = len(seq) / float(num)
out = []
last = 0.0
while last < len(seq):
out.append(seq[int(last):int(last + avg)])
last += avg
return out
# ==========================================================
# 6️⃣ DST
# ==========================================================
def is_dst(time_stamp, region="US"):
dt = datetime(time_stamp.year, time_stamp.month, time_stamp.day)
timezone = pytz.timezone("US/Eastern") if region=="US" else pytz.timezone("Europe/London")
aware_dt = timezone.localize(dt)
return aware_dt.dst() != timedelta(0)
# ==========================================================
# 7️⃣ MAIN FUNCTION (UPDATED FOR MINUTE DATA)
# ==========================================================
def settle_vol(s):
print(f"Processing {s}")
n = grains_dict[s]
outrights = generate_outrights(s, n)
ric_list = generate_spreads(s, outrights)
if not ric_list:
print("No spreads generated")
return
start = datetime.now() - timedelta(days=365)
end = datetime.now()
df = pd.DataFrame()
cks = 4
chunks = chunkIt(ric_list, cks)
for chunk in chunks:
try:
# CHANGED: Use 'VOLUME' to ensure minute-level data is returned
temp_df = rd.get_history(
universe=chunk,
fields=["TR.ACCUMULATEDVOLUME"],
interval="1min",
start=start,
end=end
)
if temp_df is None or temp_df.empty:
print("No data for chunk:", chunk)
continue
df = pd.concat([df, temp_df], axis=1)
except Exception as err:
print("Error fetching chunk:", chunk)
print(err)
continue
if df.empty:
print(f"No usable data for {s}")
return
df.index = pd.to_datetime(df.index)
df["Date"] = df.index.date
print(f"Index for {s}:", df.index[:5])
if s in ["BL2", "COM", "EMA"]:
df["isDST"] = df["Date"].apply(lambda x: is_dst(x, "Lon"))
df["Settle"] = np.where(df.isDST, time(16,30), time(17,30))
else:
df["isDST"] = df["Date"].apply(lambda x: is_dst(x))
df["Settle"] = np.where(df.isDST, time(18,15), time(19,15))
# Match exact minute
df = df[df.index.time == df.Settle]
if df.empty:
print(f"No settlement rows for {s}")
return
df.drop(["Date", "isDST", "Settle"], axis=1, inplace=True)
df.index = df.index.date
df = df.infer_objects(copy=False).fillna(0) # Fixes FutureWarning
if len(df) < 16:
print(f"Not enough rows for {s}")
return
df.loc["mean5"] = df.iloc[-16:-6].mean()
df.loc["mean4"] = df.iloc[-16:-6].mean()
df.loc["mean3"] = df.iloc[-16:-6].mean()
df.loc["mean2"] = df.iloc[-16:-6].mean()
df.loc["mean1"] = df.iloc[-16:-6].mean()
df = df.iloc[-10:]
df.columns = df.columns.str.replace(s, "")
for old, new in mn_codes.items():
df.columns = df.columns.str.replace(old, new, regex=False)
df.columns = df.columns.str.title()
df.fillna(0, inplace=True)
df.to_csv(f"volume_{s}.csv")
print(f"{s} done")
this code interval=1min it is returning date only and confused with what field to use in this this tab is for settlement aberration.in code df.index.time=df.settle it is comparing with time and returning no data . so what to do or should i use interval=tas and resample for 1min or what to do the code is failing when comapring with df.settle