import ppoConfig from '@/rl/PPOConfig';

export default class MultiRobotEnvironment {
  constructor(robotId) {
    this.robotId = robotId;
    this.robot = this.initializeRobot({});
    this.numberOfSteps = 1;
    this.episode = 1;

    this.actionSpace = {
      'class': 'Discrete',
      'n': 5, // Possible actions: forward, backward, left, right, stop
    };

    this.observationSpace = {
      'class': 'Box',
      'shape': ppoConfig.inputShape, // 3 base features + 10 objects * 3 features each
      'dtype': 'float32'
    };

    this.trainingAim = 'explore';
  }

  initializeRobot(robot) {
    return {
      id: robot.id || this.robotId,
      status: robot.status || 'unknown',
      totalDistance: robot.totalDistance || 0,
      distance: robot.distance || 0,
      objects: Array.isArray(robot.objects) ? robot.objects : [],
      isSimulated: robot.isSimulated || false,
      lastAction: null,
      consecutiveSameActions: 0,
      i: 0
    };
  }

  updateRobot(robotData) {
    this.robot = {
      ...this.robot,
      ...robotData,
      objects: Array.isArray(robotData.objects) ? robotData.objects : this.robot.objects
    };
  }

  validateEnvironment() {
    console.log('Validating environment setup...');
    if (!this.actionSpace || !this.actionSpace.class || !this.actionSpace.n) {
      throw new Error('Invalid actionSpace setup');
    }
    console.log('actionSpace:', this.actionSpace);

    if (!this.observationSpace || !this.observationSpace.class || !this.observationSpace.shape) {
      throw new Error('Invalid observationSpace setup');
    }
    console.log('observationSpace:', this.observationSpace);

    console.log('Environment validation successful');
  }

  step(action) {
    this.numberOfSteps += 1;

    const reward = this.calculateReward(this.trainingAim);
    const done = this.checkIfDone();
    const newState = this.getRobotState();

    if (this.robot.lastAction === action) {
      this.robot.consecutiveSameActions += 1;
    } else {
      this.robot.consecutiveSameActions = 0; // Reset if action changes
    }
    this.robot.lastAction = action;

    return [newState, reward, done];
  }

  getRobotState() {
    const state = [
      this.robot.distance || 0,
      this.robot.totalDistance || 0,
      this.robot.consecutiveSameActions || 0,
      ...this.flattenObjects(this.robot.objects)
    ];

    // Ensure the state has exactly 33 elements
    while (state.length < ppoConfig.inputShape[0]) state.push(0);
    if (state.length > ppoConfig.inputShape[0]) state.length = ppoConfig.inputShape[0];

    return state;
  }

  flattenObjects(objects) {
    const MAX_OBJECTS = 10;
    let flattened = [];

    for (let i = 0; i < MAX_OBJECTS; i++) {
      if (i < objects.length) {
        flattened.push(objects[i].x || 0);
        flattened.push(objects[i].y || 0);
        flattened.push(objects[i].type === 'robot' ? 1 : 0);
      } else {
        flattened.push(0, 0, 0);
      }
    }

    return flattened;
  }

  calculateReward(trainingAim) {
    let reward = 0;
    switch (trainingAim) {
      case 'disperse':
        reward = this.calculateDisperseReward();
        break;
      case 'gather':
        reward = this.calculateGatherReward();
        break;
      case 'explore':
        reward = this.calculateExploreReward();
        break;
      default:
        console.error('Unknown training aim:', trainingAim);
    }

    if (!isFinite(reward) || isNaN(reward)) {
      console.warn('Invalid reward calculated:', reward);
      reward = 0;
    }

    return reward;
  }

  calculateDisperseReward() {
    let reward = 0;
    const {
      objects = [],
        distance = 0,
        totalDistance = 0,
        consecutiveSameActions = 0
    } = this.robot;

    if (Array.isArray(objects)) {
      // Reward for being far from other robots
      const nearbyRobots = objects.filter(obj => obj.type === 'robot');
      reward += Math.max(0, 5 - nearbyRobots.length) * 0.5;

      // Reward for being close to non-robot objects
      const targetObjects = objects.filter(obj => obj.type !== 'robot');
      targetObjects.forEach(obj => {
        const distanceToObject = Math.sqrt(this.safeSquare(obj.x) + this.safeSquare(obj.y));
        reward += Math.max(0, this.safeDiv(100 - distanceToObject, 100));
      });
    }

    // Reward for moving (exploration)
    reward += Math.min(this.safeDiv(totalDistance, 100), 1);

    // Penalty for being too close to obstacles
    if (distance < 20) {
      reward -= this.safeDiv(20 - distance, 20);
    }

    // Encourage turning actions
    if (this.robot.lastAction === 'turn-left' || this.robot.lastAction === 'turn-right') {
      reward += 0.1; // Small reward for turning
    }

    // Penalty for repeating the same action too many times
    if (consecutiveSameActions > 5) {
      reward -= 0.1 * (consecutiveSameActions - 5);
    }

    return reward;
  }

  calculateGatherReward() {
    let reward = 0;
    const {
      objects = [],
        distance = 0,
        totalDistance = 0,
        consecutiveSameActions = 0
    } = this.robot;

    if (Array.isArray(objects)) {
      // Reward for being close to other robots
      const nearbyRobots = objects.filter(obj => obj.type === 'robot');
      reward += nearbyRobots.length * 0.5;

      // Penalty for being too far from any robot
      if (nearbyRobots.length === 0) {
        reward -= 1;
      }

      // Penalty for being too close to obstacles (non-robot objects)
      const nearbyObstacles = objects.filter(obj => obj.type !== 'robot');
      nearbyObstacles.forEach(obstacle => {
        const distanceToObstacle = Math.sqrt(this.safeSquare(obstacle.x) + this.safeSquare(obstacle.y));
        if (distanceToObstacle < 30) {
          reward -= this.safeDiv(30 - distanceToObstacle, 30);
        }
      });
    }

    // Reward for moving (exploration)
    reward += Math.min(this.safeDiv(totalDistance, 100), 1);

    // Encourage turning actions
    if (this.robot.lastAction === 'turn-left' || this.robot.lastAction === 'turn-right') {
      reward += 0.1; // Small reward for turning
    }

    // Penalty for being too close to obstacles
    if (distance < 20) {
      reward -= this.safeDiv(20 - distance, 20);
    }

    // Penalty for repeating the same action too many times
    if (consecutiveSameActions > 5) {
      reward -= 0.1 * (consecutiveSameActions - 5);
    }

    return reward;
  }

  calculateExploreReward() {
    let reward = 0;
    const {
        distance = 0,
        totalDistance = 0,
        consecutiveSameActions = 0
    } = this.robot;

    // Major reward for distance traveled
    reward += Math.min(this.safeDiv(totalDistance, 100), 5);

    // Penalty for being too close to obstacles
    if (distance < 20) {
      reward -= this.safeDiv(20 - distance, 20);
    }

    // Encourage turning actions
    if (this.robot.lastAction === 'forward') {
      reward += 0.1; // Small reward for turning
    }

    // Small penalty for repeating the same action too many times to encourage exploration
    if (consecutiveSameActions > 5) {
      reward -= 0.05 * (consecutiveSameActions - 5);
    }

    return reward;
  }

  // Helper methods to prevent NaN
  safeDiv(a, b) {
    return b !== 0 ? a / b : 0;
  }

  safeSquare(x) {
    return isFinite(x) ? x * x : 0;
  }

  calculateStepPerformance(trainingAim) {
    const robotState = {
      objects: this.robot.objects,
      distance: this.robot.distance,
      totalDistance: this.robot.totalDistance,
      consecutiveSameActions: this.robot.consecutiveSameActions
    };

    let accuracy = 0;

    // Calculate accuracy based on the training aim
    if (trainingAim === 'gather') {
      const nearbyObjects = robotState.objects.filter(obj => obj.type !== 'robot');
      const nearbyRobots = robotState.objects.filter(obj => obj.type === 'robot');

      if (nearbyObjects.length > 0) {
        // Higher accuracy when there are objects and distance is small
        accuracy = 100 - Math.min(robotState.distance, 100);
      } else if (nearbyRobots.length > 0 && robotState.distance < 40) {
        // Negative reward when only robots are nearby
        accuracy = -50;
      } else if (robotState.distance < 40) {
        // Negative reward when no objects are detected but distance is small
        accuracy = -25;
      }
    } else if (trainingAim === 'disperse') {
      const nearbyObjects = robotState.objects.filter(obj => obj.type !== 'robot');
      const nearbyRobots = robotState.objects.filter(obj => obj.type === 'robot');

      if (nearbyRobots.length > 0) {
        // Higher accuracy when there are nearby robots and distance is small
        accuracy = 100 - Math.min(robotState.distance, 100);
      } else if (nearbyObjects.length > 0 && robotState.distance < 40) {
        // Negative reward when objects are nearby
        accuracy = -50;
      } else if (robotState.distance < 40) {
        // Negative reward when no objects are detected but distance is small (too close to an obstacle)
        accuracy = -25;
      }
    } else if (trainingAim === 'explore') {
      accuracy = robotState.totalDistance > 1000 ? 100 : 0;
    }

    return accuracy;
  }

  checkIfDone() {
    const maxSteps = 100;
    const { objects = [], totalDistance = 0, distance = 300 } = this.robot;

    switch (this.trainingAim) {
      case 'gather':
        // Done when the robot is within 40 cm of at least one robot or max steps reached
        console.log('Gather is done for this episode using robotID:', this.robotId, objects.some(obj => obj.type === 'robot' && distance < 40), this.numberOfSteps > maxSteps);
        return objects.some(obj => obj.type === 'robot' && distance < 40) || this.numberOfSteps > maxSteps;

      case 'disperse':
        // Done when the robot is within 40 cm of a non-robot object or max steps reached  
        console.log('Disperse is done for this episode using robotID:', this.robotId, objects.some(obj => obj.type !== 'robot' && distance < 40), this.numberOfSteps > maxSteps);
        return objects.some(obj => obj.type !== 'robot' && distance < 40) || this.numberOfSteps > maxSteps;

      case 'explore':
        // Done when the robot has covered a significant area, seen different objects and robots, or max steps reached
        console.log('Explore is done for this episode using robotID:', this.robotId, totalDistance > 1000, this.numberOfSteps > maxSteps);
        return totalDistance > 1000 || this.numberOfSteps > maxSteps;

      default:
        // Default to ending after max steps
        return this.numberOfSteps > maxSteps;
    }
  }

  reset() {
    this.robot = this.initializeRobot({});
    return this.getRobotState();
  }

  setTrainingAim(aim) {
    if (['disperse', 'gather', 'explore'].includes(aim)) {
      this.trainingAim = aim;
    } else {
      console.error('Invalid training aim:', aim);
    }
  }
}