## Using this python code, you can give a list of cities in an excel file & a list of destinations (defined in the code)
## and the code will call carbontracter and  give you back another excel file with co2 emissions for flight & train travel 
## between the cities. The first half of the code defines functions and imports packages that will be used


##### Import packages & define functions

import pandas as pd
import matplotlib.pyplot as plt
import time
from pycarbontracer import CarbonTracer

## Rotates the API keys so that the max. number of calls per hour is not exceeded for any of them individually
def get_api_key():
    global api_call_count, current_key_index
    if api_call_count >= 1900:  # Rotate the key after 1900 calls
        current_key_index = (current_key_index + 1) % len(API_KEYS)
        api_call_count = 0
    api_call_count += 1
    return API_KEYS[current_key_index]



## Function to get CO2 data with caching; The main function that gets the data from Carbontracer
def get_co2_data(transport_mode, origin, destination):
    global ct, co2_cache
    ct = CarbonTracer(get_api_key())

    # Generate a unique key for caching; This part is to mainly check if some combination of origin-destination has
    # already been checked. This avoids calling the API for the same combiation multiple times and speeds up the analysis
    cache_key = f"{transport_mode}_{origin}_{destination}"
    print(cache_key)
    if cache_key in co2_cache:
        print("Using cached data")
        return co2_cache[cache_key]  # Return cached result if available

    ## In the part below, we get the data from carbontracer; It check if the response was a success or else it spits
    ## out an error. It's common to see this error. It might happen if there are no train connection possible
    ## or if there are no flight connections as well as if the cities are not identified
    retries = 1
    while retries > 0:
        try:
            result = ct.routing(transport_mode, origin, destination)
            time.sleep(0.5)  # Respect API rate limit
            if result['response']['success']:
                data = result['response']['data']
                co2_data = {
                    'co2eq': data['aCO2eq']['economy'] if transport_mode == "flight" else data['co2eq'],
                    'distance_direct': data['distanceDirect'],
                    'distance_route': data['distanceRoute']
                }
                co2_cache[cache_key] = co2_data  # Cache the result
                return co2_data
        except Exception as e:
            print(f"Error for {transport_mode} from {origin} to {destination}: {e}")
        retries -= 1
    print(f"Failed after retries for {transport_mode} from {origin} to {destination}")
    return None


############ The main part of the code where you can make changes based on your file and requirements  #####

## API keys for CarbonTracer; I have used the three API keys I have. 
## There are limitations to how fast and how many times the API can be called
API_KEYS = ["nPfnGW8Mv9NZT2jzpWpAfXSBHDYKfWk8xWhVhQbe", "IYZjmzSd2W3K5SLhvDiZpvDQrnqL4WNM1IZ47tF1", "cKkgSHBk3QBKDuJFvrA9kj4wk5tcL2PcweHQH55L"]  # Replace with your actual API keys
current_key_index = 0
api_call_count = 0

# Initialize CarbonTracer
ct = CarbonTracer(get_api_key())

# Cache to store results; It basically stores all the information that we get from Carbontracer
co2_cache = {}

# Read Excel file
## This is the input file, it assumes the file is in the same folder from where you are running the python code
## So you will need to enter the full address in case it is in another folder
input_file = "Participants_ENC_table-1.xlsx" 
df = pd.read_excel(input_file)

print("File read") ## To keep track of the code and errors

# Fixed destinations; Here you can give a list of destinations for which you want to analyse your file
destinations = ["San Francisco"]

# Prepare columns for results to be saved in the new excel file
for destination in destinations:
    for col in ['co2eq', 'distance_direct', 'distance_route']:
        for mode in ['flight', 'train']:
            df[f"{mode}_{col}_{destination}"] = None

# Process each destination; Heart of the code which iterates over the file and calls the functions
for destination in destinations:
    for i, row in df.iterrows():
        origin = row['Airport'] ### IMPORTANT: Enter the column name for the list of cities;  case-sensitive
        print(f"\nChecking from {origin} to {destination}")
        flight_data = get_co2_data("flight", origin, destination)
        train_data = get_co2_data("train", origin, destination)

        if flight_data:
            print(f"Success: flight from {origin} to {destination}")
            df.at[i, f'flight_co2eq_{destination}'] = flight_data['co2eq']
            df.at[i, f'flight_distance_direct_{destination}'] = flight_data['distance_direct']
            df.at[i, f'flight_distance_route_{destination}'] = flight_data['distance_route']
        if train_data:
            print(f"Success: train from {origin} to {destination}")
            df.at[i, f'train_co2eq_{destination}'] = train_data['co2eq']
            df.at[i, f'train_distance_direct_{destination}'] = train_data['distance_direct']
            df.at[i, f'train_distance_route_{destination}'] = train_data['distance_route']

# Calculate total CO2 emissions grouped by year for each destination
for destination in destinations:
    # Fill NaN with 0 for both flight and train
    df[f'flight_co2eq_{destination}'] = df[f'flight_co2eq_{destination}'].fillna(0)
    df[f'train_co2eq_{destination}'] = df[f'train_co2eq_{destination}'].fillna(0)

    # Set total_co2eq: train if non-zero, otherwise flight
    df[f'total_co2eq_{destination}'] = df[f'train_co2eq_{destination}'].where(
        df[f'train_co2eq_{destination}'] != 0,
        df[f'flight_co2eq_{destination}']
    )
#     co2_by_year = df.groupby('year')[f'total_co2eq_{destination}'].sum()

#     # Plot CO2 emissions by year
#     plt.figure()
#     co2_by_year.plot(kind='bar', title=f"Total CO2 Emissions for {destination} by Year")
#     plt.ylabel("Total CO2 (kg)")
#     plt.xlabel("Year")
#     plt.tight_layout()
#     plt.show()

# Save updated DataFrame
output_file = "ENC - SF.xlsx" ## Output file name; it will overwrite if you use an existing filename
df.to_excel(output_file, index=False)
print(f"Updated data saved to {output_file}")