xmindflow · Jul 27, 2023
diff --git a/‎common/evaluation.py
+39 b/‎common/evaluation.py
+39
diff --git a/‎common/logger.py
+134 b/‎common/logger.py
+134
diff --git a/‎common/utils.py
+32 b/‎common/utils.py
+32
diff --git a/‎common/vis.py
+106 b/‎common/vis.py
+106
diff --git a/‎create_mask/creating_mask.ipynb
+348 b/‎create_mask/creating_mask.ipynb
+348
diff --git a/‎create_mask/dino.py
+361 b/‎create_mask/dino.py
+361
diff --git a/‎create_mask/utils.py
+9 b/‎create_mask/utils.py
+9
diff --git a/‎data/dataset.py
+34 b/‎data/dataset.py
+34
diff --git a/‎data/fss.py
+128 b/‎data/fss.py
+128
diff --git a/‎data/splits/fss/test.txt
+240 b/‎data/splits/fss/test.txt
+240
diff --git a/‎data/splits/fss/trn.txt
+520 b/‎data/splits/fss/trn.txt
+520
diff --git a/‎data/splits/fss/val.txt
+240 b/‎data/splits/fss/val.txt
+240
diff --git a/‎model/decoder.py
+221 b/‎model/decoder.py
+221
diff --git a/‎model/mymodel.py
+114 b/‎model/mymodel.py
+114
diff --git a/‎test.py
+124 b/‎test.py
+124
diff --git a/‎train.py
+105 b/‎train.py
+105
@@ -0,0 +1,39 @@
+r""" Evaluate mask prediction """
+import torch
+
+
+class Evaluator:
+    r""" Computes intersection and union between prediction and ground-truth """
+    @classmethod
+    def initialize(cls):
+        cls.ignore_index = 255
+
+    @classmethod
+    def classify_prediction(cls, pred_mask, batch):
+        gt_mask = batch.get('query_mask')
+
+        # Apply ignore_index in PASCAL-5i masks (following evaluation scheme in PFE-Net (TPAMI 2020))
+        query_ignore_idx = batch.get('query_ignore_idx')
+        if query_ignore_idx is not None:
+            assert torch.logical_and(query_ignore_idx, gt_mask).sum() == 0
+            query_ignore_idx *= cls.ignore_index
+            gt_mask = gt_mask + query_ignore_idx
+            pred_mask[gt_mask == cls.ignore_index] = cls.ignore_index
+
+        # compute intersection and union of each episode in a batch
+        area_inter, area_pred, area_gt = [],  [], []
+        for _pred_mask, _gt_mask in zip(pred_mask, gt_mask):
+            _inter = _pred_mask[_pred_mask == _gt_mask]
+            if _inter.size(0) == 0:  # as torch.histc returns error if it gets empty tensor (pytorch 1.5.1)
+                _area_inter = torch.tensor([0, 0], device=_pred_mask.device)
+            else:
+                _area_inter = torch.histc(_inter, bins=2, min=0, max=1)
+            area_inter.append(_area_inter)
+            area_pred.append(torch.histc(_pred_mask, bins=2, min=0, max=1))
+            area_gt.append(torch.histc(_gt_mask, bins=2, min=0, max=1))
+        area_inter = torch.stack(area_inter).t()
+        area_pred = torch.stack(area_pred).t()
+        area_gt = torch.stack(area_gt).t()
+        area_union = area_pred + area_gt - area_inter
+
+        return area_inter, area_union
@@ -0,0 +1,134 @@
+r""" Logging during training/testing """
+import datetime
+import logging
+import os
+
+from tensorboardX import SummaryWriter
+import torch
+
+
+class AverageMeter:
+    r""" Stores loss, evaluation results """
+    def __init__(self, dataset):
+        self.benchmark = dataset.benchmark
+        self.class_ids_interest = dataset.class_ids
+        self.class_ids_interest = torch.tensor(self.class_ids_interest).cuda()
+
+        if self.benchmark == 'pascal':
+            self.nclass = 20
+        elif self.benchmark == 'coco':
+            self.nclass = 80
+        elif self.benchmark == 'fss':
+            self.nclass = 1000
+        elif self.benchmark == 'ph2':
+            self.nclass = 1
+        self.intersection_buf = torch.zeros([2, self.nclass]).float().cuda()
+        self.union_buf = torch.zeros([2, self.nclass]).float().cuda()
+        self.ones = torch.ones_like(self.union_buf)
+        self.loss_buf = []
+
+    def update(self, inter_b, union_b, class_id, loss):
+        self.intersection_buf.index_add_(1, class_id, inter_b.float())
+        self.union_buf.index_add_(1, class_id, union_b.float())
+        if loss is None:
+            loss = torch.tensor(0.0)
+        self.loss_buf.append(loss)
+
+    def compute_iou(self):
+        iou = self.intersection_buf.float() / \
+              torch.max(torch.stack([self.union_buf, self.ones]), dim=0)[0]
+        iou = iou.index_select(1, self.class_ids_interest)
+        miou = iou[1].mean() * 100
+
+        fb_iou = (self.intersection_buf.index_select(1, self.class_ids_interest).sum(dim=1) /
+                  self.union_buf.index_select(1, self.class_ids_interest).sum(dim=1)).mean() * 100
+
+        return miou, fb_iou
+
+    def write_result(self, split, epoch):
+        iou, fb_iou = self.compute_iou()
+
+        loss_buf = torch.stack(self.loss_buf)
+        msg = '\n*** %s ' % split
+        msg += '[@Epoch %02d] ' % epoch
+        msg += 'Avg L: %6.5f  ' % loss_buf.mean()
+        msg += 'mIoU: %5.2f   ' % iou
+        msg += 'FB-IoU: %5.2f   ' % fb_iou
+
+        msg += '***\n'
+        Logger.info(msg)
+
+    def write_process(self, batch_idx, datalen, epoch, write_batch_idx=20):
+        if batch_idx % write_batch_idx == 0:
+            msg = '[Epoch: %02d] ' % epoch if epoch != -1 else ''
+            msg += '[Batch: %04d/%04d] ' % (batch_idx+1, datalen)
+            iou, fb_iou = self.compute_iou()
+            if epoch != -1:
+                loss_buf = torch.stack(self.loss_buf)
+                msg += 'L: %6.5f  ' % loss_buf[-1]
+                msg += 'Avg L: %6.5f  ' % loss_buf.mean()
+            msg += 'mIoU: %5.2f  |  ' % iou
+            msg += 'FB-IoU: %5.2f' % fb_iou
+            Logger.info(msg)
+
+
+class Logger:
+    r""" Writes evaluation results of training/testing """
+    @classmethod
+    def initialize(cls, args, training):
+        logtime = datetime.datetime.now().__format__('_%m%d_%H%M%S')
+        logpath = args.logpath if training else '_TEST_' + args.load.split('/')[-2].split('.')[0] + logtime
+        if logpath == '': logpath = logtime
+
+        cls.logpath = os.path.join('logs', logpath + '.log')
+        cls.benchmark = args.benchmark
+        os.makedirs(cls.logpath)
+
+        logging.basicConfig(filemode='w',
+                            filename=os.path.join(cls.logpath, 'log.txt'),
+                            level=logging.INFO,
+                            format='%(message)s',
+                            datefmt='%m-%d %H:%M:%S')
+
+        # Console log config
+        console = logging.StreamHandler()
+        console.setLevel(logging.INFO)
+        formatter = logging.Formatter('%(message)s')
+        console.setFormatter(formatter)
+        logging.getLogger('').addHandler(console)
+
+        # Tensorboard writer
+        cls.tbd_writer = SummaryWriter(os.path.join(cls.logpath, 'tbd/runs'))
+
+        # Log arguments
+        logging.info('\n:=========== Few-shot Seg. with HSNet ===========')
+        for arg_key in args.__dict__:
+            logging.info('| %20s: %-24s' % (arg_key, str(args.__dict__[arg_key])))
+        logging.info(':================================================\n')
+
+    @classmethod
+    def info(cls, msg):
+        r""" Writes log message to log.txt """
+        logging.info(msg)
+
+    @classmethod
+    def save_model_miou(cls, model, epoch, val_miou):
+        torch.save(model.state_dict(), os.path.join(cls.logpath, 'best_model.pt'))
+        cls.info('Model saved @%d w/ val. mIoU: %5.2f.\n' % (epoch, val_miou))
+
+    @classmethod
+    def log_params(cls, model):
+        backbone_param = 0
+        learner_param = 0
+        for k in model.state_dict().keys():
+            n_param = model.state_dict()[k].view(-1).size(0)
+            if k.split('.')[0] in 'backbone':
+                if k.split('.')[1] in ['classifier', 'fc']:  # as fc layers are not used in HSNet
+                    continue
+                backbone_param += n_param
+            else:
+                learner_param += n_param
+        Logger.info('Backbone # param.: %d' % backbone_param)
+        Logger.info('Learnable # param.: %d' % learner_param)
+        Logger.info('Total # param.: %d' % (backbone_param + learner_param))
+
@@ -0,0 +1,32 @@
+r""" Helper functions """
+import random
+
+import torch
+import numpy as np
+
+
+def fix_randseed(seed):
+    r""" Set random seeds for reproducibility """
+    if seed is None:
+        seed = int(random.random() * 1e5)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.deterministic = True
+
+
+def mean(x):
+    return sum(x) / len(x) if len(x) > 0 else 0.0
+
+
+def to_cuda(batch):
+    for key, value in batch.items():
+        if isinstance(value, torch.Tensor):
+            batch[key] = value.cuda()
+    return batch
+
+
+def to_cpu(tensor):
+    return tensor.detach().clone().cpu()
@@ -0,0 +1,106 @@
+r""" Visualize model predictions """
+import os
+
+from PIL import Image
+import numpy as np
+import torchvision.transforms as transforms
+
+from . import utils
+
+
+class Visualizer:
+
+    @classmethod
+    def initialize(cls, visualize):
+        cls.visualize = visualize
+        if not visualize:
+            return
+
+        cls.colors = {'red': (255, 50, 50), 'blue': (102, 140, 255)}
+        for key, value in cls.colors.items():
+            cls.colors[key] = tuple([c / 255 for c in cls.colors[key]])
+
+        cls.mean_img = [0.485, 0.456, 0.406]
+        cls.std_img = [0.229, 0.224, 0.225]
+        cls.to_pil = transforms.ToPILImage()
+        cls.vis_path = './vis/'
+        if not os.path.exists(cls.vis_path): os.makedirs(cls.vis_path)
+
+    @classmethod
+    def visualize_prediction_batch(cls, spt_img_b, spt_mask_b, qry_img_b, qry_mask_b, pred_mask_b, cls_id_b, batch_idx, iou_b=None):
+        spt_img_b = utils.to_cpu(spt_img_b)
+        spt_mask_b = utils.to_cpu(spt_mask_b)
+        qry_img_b = utils.to_cpu(qry_img_b)
+        qry_mask_b = utils.to_cpu(qry_mask_b)
+        pred_mask_b = utils.to_cpu(pred_mask_b)
+        cls_id_b = utils.to_cpu(cls_id_b)
+
+        for sample_idx, (spt_img, spt_mask, qry_img, qry_mask, pred_mask, cls_id) in \
+                enumerate(zip(spt_img_b, spt_mask_b, qry_img_b, qry_mask_b, pred_mask_b, cls_id_b)):
+            iou = iou_b[sample_idx] if iou_b is not None else None
+            cls.visualize_prediction(spt_img, spt_mask, qry_img, qry_mask, pred_mask, cls_id, batch_idx, sample_idx, True, iou)
+
+    @classmethod
+    def to_numpy(cls, tensor, type):
+        if type == 'img':
+            return np.array(cls.to_pil(cls.unnormalize(tensor))).astype(np.uint8)
+        elif type == 'mask':
+            return np.array(tensor).astype(np.uint8)
+        else:
+            raise Exception('Undefined tensor type: %s' % type)
+
+    @classmethod
+    def visualize_prediction(cls, spt_imgs, spt_masks, qry_img, qry_mask, pred_mask, cls_id, batch_idx, sample_idx, label, iou=None):
+
+        spt_color = cls.colors['blue']
+        qry_color = cls.colors['red']
+        pred_color = cls.colors['red']
+
+        spt_imgs = [cls.to_numpy(spt_img, 'img') for spt_img in spt_imgs]
+        spt_pils = [cls.to_pil(spt_img) for spt_img in spt_imgs]
+        spt_masks = [cls.to_numpy(spt_mask, 'mask') for spt_mask in spt_masks]
+        spt_masked_pils = [Image.fromarray(cls.apply_mask(spt_img, spt_mask, spt_color)) for spt_img, spt_mask in zip(spt_imgs, spt_masks)]
+
+        qry_img = cls.to_numpy(qry_img, 'img')
+        qry_pil = cls.to_pil(qry_img)
+        qry_mask = cls.to_numpy(qry_mask, 'mask')
+        pred_mask = cls.to_numpy(pred_mask, 'mask')
+        pred_masked_pil = Image.fromarray(cls.apply_mask(qry_img.astype(np.uint8), pred_mask.astype(np.uint8), pred_color))
+        qry_masked_pil = Image.fromarray(cls.apply_mask(qry_img.astype(np.uint8), qry_mask.astype(np.uint8), qry_color))
+
+        merged_pil = cls.merge_image_pair(spt_masked_pils + [pred_masked_pil, qry_masked_pil])
+
+        iou = iou.item() if iou else 0.0
+        merged_pil.save(cls.vis_path + '%d_%d_class-%d_iou-%.2f' % (batch_idx, sample_idx, cls_id, iou) + '.jpg')
+
+    @classmethod
+    def merge_image_pair(cls, pil_imgs):
+        r""" Horizontally aligns a pair of pytorch tensor images (3, H, W) and returns PIL object """
+
+        canvas_width = sum([pil.size[0] for pil in pil_imgs])
+        canvas_height = max([pil.size[1] for pil in pil_imgs])
+        canvas = Image.new('RGB', (canvas_width, canvas_height))
+
+        xpos = 0
+        for pil in pil_imgs:
+            canvas.paste(pil, (xpos, 0))
+            xpos += pil.size[0]
+
+        return canvas
+
+    @classmethod
+    def apply_mask(cls, image, mask, color, alpha=0.5):
+        r""" Apply mask to the given image. """
+        for c in range(3):
+            image[:, :, c] = np.where(mask == 1,
+                                      image[:, :, c] *
+                                      (1 - alpha) + alpha * color[c] * 255,
+                                      image[:, :, c])
+        return image
+
+    @classmethod
+    def unnormalize(cls, img):
+        img = img.clone()
+        for im_channel, mean, std in zip(img, cls.mean_img, cls.std_img):
+            im_channel.mul_(std).add_(mean)
+        return img
@@ -0,0 +1,348 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('./model')\n",
+    "import dino # model\n",
+    "import argparse\n",
+    "import utils\n",
+    "import os\n",
+    "\n",
+    "import PIL.Image as Image\n",
+    "import cv2\n",
+    "import numpy as np\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "from torchvision import transforms\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "import numpy as np\n",
+    "from scipy.linalg import eigh\n",
+    "from scipy import ndimage\n",
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "import numpy as np\n",
+    "import glob\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def ncut(feats, dims, scales, init_image_size, tau = 0, eps=1e-5, im_name='', no_binary_graph=False):\n",
+    "    \"\"\"\n",
+    "    Implementation of NCut Method.\n",
+    "    Inputs\n",
+    "      feats: the pixel/patche features of an image\n",
+    "      dims: dimension of the map from which the features are used\n",
+    "      scales: from image to map scale\n",
+    "      init_image_size: size of the image\n",
+    "      tau: thresold for graph construction\n",
+    "      eps: graph edge weight\n",
+    "      im_name: image_name\n",
+    "      no_binary_graph: ablation study for using similarity score as graph edge weight\n",
+    "    \"\"\"\n",
+    "    feats = F.normalize(feats, p=2, dim=0)\n",
+    "    A = (feats.transpose(0,1) @ feats)\n",
+    "    A = A.cpu().numpy()\n",
+    "    if no_binary_graph:\n",
+    "        A[A<tau] = eps\n",
+    "    else:\n",
+    "        A = A > tau\n",
+    "        A = np.where(A.astype(float) == 0, eps, A)\n",
+    "    d_i = np.sum(A, axis=1)\n",
+    "    D = np.diag(d_i)\n",
+    "\n",
+    "    # Print second and third smallest eigenvector\n",
+    "    _, eigenvectors = eigh(D-A, D, subset_by_index=[1,2])\n",
+    "    eigenvec = np.copy(eigenvectors[:, 0])\n",
+    "\n",
+    "\n",
+    "    # method1 avg\n",
+    "    second_smallest_vec = eigenvectors[:, 0]\n",
+    "    avg = np.sum(second_smallest_vec) / len(second_smallest_vec)\n",
+    "    bipartition = second_smallest_vec > avg\n",
+    "\n",
+    "    seed = np.argmax(np.abs(second_smallest_vec))\n",
+    "\n",
+    "    if bipartition[seed] != 1:\n",
+    "        eigenvec = eigenvec * -1\n",
+    "        bipartition = np.logical_not(bipartition)\n",
+    "    bipartition = bipartition.reshape(dims).astype(float)\n",
+    "\n",
+    "    # predict BBox\n",
+    "    pred, _, objects,cc = detect_box(bipartition, seed, dims, scales=scales, initial_im_size=init_image_size) ## We only extract the principal object BBox\n",
+    "    mask = np.zeros(dims)\n",
+    "    mask[cc[0],cc[1]] = 1\n",
+    "\n",
+    "    mask = torch.from_numpy(mask).to('cuda')\n",
+    "#    mask = torch.from_numpy(bipartition).to('cuda')\n",
+    "    bipartition = F.interpolate(mask.unsqueeze(0).unsqueeze(0), size=init_image_size, mode='nearest').squeeze()\n",
+    "    \n",
+    "\n",
+    "    eigvec = second_smallest_vec.reshape(dims) \n",
+    "    eigvec = torch.from_numpy(eigvec).to('cuda')\n",
+    "    eigvec = F.interpolate(eigvec.unsqueeze(0).unsqueeze(0), size=init_image_size, mode='nearest').squeeze()\n",
+    "    return  seed, bipartition.cpu().numpy(), eigvec.cpu().numpy(), eigenvectors\n",
+    "\n",
+    "def detect_box(bipartition, seed,  dims, initial_im_size=None, scales=None, principle_object=True):\n",
+    "    \"\"\"\n",
+    "    Extract a box corresponding to the seed patch. Among connected components extract from the affinity matrix, select the one corresponding to the seed patch.\n",
+    "    \"\"\"\n",
+    "    w_featmap, h_featmap = dims\n",
+    "    objects, num_objects = ndimage.label(bipartition)\n",
+    "    cc = objects[np.unravel_index(seed, dims)]\n",
+    "\n",
+    "\n",
+    "    if principle_object:\n",
+    "        mask = np.where(objects == cc)\n",
+    "       # Add +1 because excluded max\n",
+    "        ymin, ymax = min(mask[0]), max(mask[0]) + 1\n",
+    "        xmin, xmax = min(mask[1]), max(mask[1]) + 1\n",
+    "        # Rescale to image size\n",
+    "        r_xmin, r_xmax = scales[1] * xmin, scales[1] * xmax\n",
+    "        r_ymin, r_ymax = scales[0] * ymin, scales[0] * ymax\n",
+    "        pred = [r_xmin, r_ymin, r_xmax, r_ymax]\n",
+    "\n",
+    "        # Check not out of image size (used when padding)\n",
+    "        if initial_im_size:\n",
+    "            pred[2] = min(pred[2], initial_im_size[1])\n",
+    "            pred[3] = min(pred[3], initial_im_size[0])\n",
+    "\n",
+    "        # Coordinate predictions for the feature space\n",
+    "        # Axis different then in image space\n",
+    "        pred_feats = [ymin, xmin, ymax, xmax]\n",
+    "\n",
+    "        return pred, pred_feats, objects, mask\n",
+    "    else:\n",
+    "        raise NotImplementedError\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Image transformation applied to all images\n",
+    "ToTensor = transforms.Compose([\n",
+    "                                transforms.ToTensor(),\n",
+    "                                transforms.Normalize((0.485, 0.456, 0.406),\n",
+    "                                                     (0.229, 0.224, 0.225)),])\n",
+    "\n",
+    "def get_tokencut_binary_map(img_pth, backbone,patch_size, tau) :\n",
+    "    I = Image.open(img_pth).convert('RGB')\n",
+    "    I_resize, w, h, feat_w, feat_h = utils.resize_pil(I, patch_size)\n",
+    "\n",
+    "    tensor = ToTensor(I_resize).unsqueeze(0).cuda()\n",
+    "    feat = backbone(tensor)[0]\n",
+    "\n",
+    "    seed, bipartition, eigvec, eigvectors = ncut(feat, [feat_h, feat_w], [patch_size, patch_size], [h,w], tau)\n",
+    "    return bipartition, eigvec, eigvectors.reshape([feat_h, feat_w, 2]).astype(float)\n",
+    "\n",
+    "def mask_color_compose(org, mask, mask_color = [173, 216, 230]) :\n",
+    "\n",
+    "    mask_fg = mask > 0.5\n",
+    "    rgb = np.copy(org)\n",
+    "    rgb[mask_fg] = (rgb[mask_fg] * 0.3 + np.array(mask_color) * 0.7).astype(np.uint8)\n",
+    "\n",
+    "    return Image.fromarray(rgb)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Namespace(out_dir='./output', vit_arch='small', vit_feat='k', patch_size=16, tau=0.2, sigma_spatial=16, sigma_luma=16, sigma_chroma=8, dataset=None, nb_vis=100, img_path='D:/deeplearning_Sanaz/TokenCut/TokenCut/examples/mydata/ab_wheel', save_feat_dir='../image')\n"
+     ]
+    }
+   ],
+   "source": [
+    "parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\n",
+    "\n",
+    "## input / output dir\n",
+    "parser.add_argument('--out-dir', type=str, default = './output', help='output directory')\n",
+    "\n",
+    "parser.add_argument('--vit-arch', type=str, default='small', choices=['base', 'small'], help='which architecture')\n",
+    "\n",
+    "parser.add_argument('--vit-feat', type=str, default='k', choices=['k', 'q', 'v', 'kqv'], help='which features')\n",
+    "\n",
+    "parser.add_argument('--patch-size', type=int, default=16, choices=[16, 8], help='patch size')\n",
+    "\n",
+    "parser.add_argument('--tau', type=float, default=0.2, help='Tau for tresholding graph')\n",
+    "\n",
+    "parser.add_argument('--sigma-spatial', type=float, default=16, help='sigma spatial in the bilateral solver')\n",
+    "\n",
+    "parser.add_argument('--sigma-luma', type=float, default=16, help='sigma luma in the bilateral solver')\n",
+    "\n",
+    "parser.add_argument('--sigma-chroma', type=float, default=8, help='sigma chroma in the bilateral solver')\n",
+    "\n",
+    "\n",
+    "parser.add_argument('--dataset', type=str, default=None, choices=['ECSSD', 'DUTS', 'DUT', None], help='which dataset?')\n",
+    "\n",
+    "parser.add_argument('--nb-vis', type=int, default=100, choices=[1, 200], help='nb of visualization')\n",
+    "\n",
+    "parser.add_argument('--img-path', type=str, default='fss-dataset/mydata/ab_wheel', help='single image visualization')\n",
+    "parser.add_argument('--save_feat_dir',type=str, default= '../image')\n",
+    "args = parser.parse_args(args=[])\n",
+    "print (args)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading weight from /dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "ViTFeat(\n",
+       "  (model): VisionTransformer(\n",
+       "    (patch_embed): PatchEmbed(\n",
+       "      (proj): Conv2d(3, 384, kernel_size=(16, 16), stride=(16, 16))\n",
+       "    )\n",
+       "    (pos_drop): Dropout(p=0.0, inplace=False)\n",
+       "    (blocks): ModuleList(\n",
+       "      (0-11): 12 x Block(\n",
+       "        (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)\n",
+       "        (attn): Attention(\n",
+       "          (qkv): Linear(in_features=384, out_features=1152, bias=True)\n",
+       "          (attn_drop): Dropout(p=0.0, inplace=False)\n",
+       "          (proj): Linear(in_features=384, out_features=384, bias=True)\n",
+       "          (proj_drop): Dropout(p=0.0, inplace=False)\n",
+       "        )\n",
+       "        (drop_path): Identity()\n",
+       "        (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)\n",
+       "        (mlp): Mlp(\n",
+       "          (fc1): Linear(in_features=384, out_features=1536, bias=True)\n",
+       "          (act): GELU(approximate='none')\n",
+       "          (fc2): Linear(in_features=1536, out_features=384, bias=True)\n",
+       "          (drop): Dropout(p=0.0, inplace=False)\n",
+       "        )\n",
+       "      )\n",
+       "    )\n",
+       "    (norm): LayerNorm((384,), eps=1e-06, elementwise_affine=True)\n",
+       "    (head): Identity()\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "## Define the network for feature extraction\n",
+    "if args.vit_arch == 'base' and args.patch_size == 16:\n",
+    "    url = \"/dino/dino_vitbase16_pretrain/dino_vitbase16_pretrain.pth\"\n",
+    "    feat_dim = 768\n",
+    "elif args.vit_arch == 'base' and args.patch_size == 8:\n",
+    "    url = \"/dino/dino_vitbase8_pretrain/dino_vitbase8_pretrain.pth\"\n",
+    "    feat_dim = 768\n",
+    "elif args.vit_arch == 'small' and args.patch_size == 16:\n",
+    "    url = \"/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth\"\n",
+    "    feat_dim = 384\n",
+    "elif args.vit_arch == 'base' and args.patch_size == 8:\n",
+    "    url = \"/dino/dino_deitsmall8_300ep_pretrain/dino_deitsmall8_300ep_pretrain.pth\"\n",
+    "\n",
+    "backbone = dino.ViTFeat(url, feat_dim, args.vit_arch, args.vit_feat, args.patch_size)\n",
+    "msg = 'Load {} pre-trained feature...'.format(args.vit_arch)\n",
+    "# print (msg)\n",
+    "backbone.eval()\n",
+    "backbone.cuda()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "from skimage.io import imread\n",
+    "import cv2\n",
+    "from PIL import Image\n",
+    "pattern = args.img_path + \"/**/*.jpg\"\n",
+    "# Get a list of file paths that match the pattern\n",
+    "image_list = glob.glob(pattern, recursive=True)\n",
+    "\n",
+    "# Iterate over the file paths and load the images using PIL\n",
+    "for im_path in image_list:\n",
+    "    folder_path,im_pth = os.path.split(im_path)\n",
+    "\n",
+    "    \n",
+    "    if im_pth.endswith('.jpg'):\n",
+    "        img = Image.open(im_path)\n",
+    "        original_image = imread(im_path)\n",
+    "        \n",
+    "\n",
+    "        im_name =os.path.basename(im_pth)\n",
+    "        im_name = im_name.split('.')[0]\n",
+    "\n",
+    "        \n",
+    "        bipartition, eigvec, eigvectors = get_tokencut_binary_map(im_path, backbone, args.patch_size, args.tau)\n",
+    "        bipartition = bipartition*255\n",
+    "        im_jpg = Image.fromarray( bipartition)\n",
+    "        binary_mask = im_jpg.convert('RGB')\n",
+    "        binary_mask.save(os.path.join(folder_path,im_name+'_mask'+'.png'))\n",
+    "        binary_mask = np.array(binary_mask)\n",
+    "        binary_mask = binary_mask.astype(np.float32) / 255\n",
+    "        \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -0,0 +1,361 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Copied from Dino repo. https://github.com/facebookresearch/dino
+Mostly copy-paste from timm library.
+https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
+"""
+import math
+from functools import partial
+
+import torch
+import torch.nn as nn
+
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1. + math.erf(x / math.sqrt(2.))) / 2.
+
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+                      "The distribution of values may be incorrect.",
+                      stacklevel=2)
+
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.))
+        tensor.add_(mean)
+
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+
+
+def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
+    # type: (Tensor, float, float, float, float) -> Tensor
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)
+
+
+def drop_path(x, drop_prob: float = 0., training: bool = False):
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+    random_tensor.floor_()  # binarize
+    output = x.div(keep_prob) * random_tensor
+    return output
+
+
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+
+
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+    def forward(self, x):
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]
+
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x, attn
+
+
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+    def forward(self, x, return_attention=False):
+        y, attn = self.attn(self.norm1(x))
+        if return_attention:
+            return attn
+        x = x + self.drop_path(y)
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+
+
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        num_patches = (img_size // patch_size) * (img_size // patch_size)
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+
+
+class VisionTransformer(nn.Module):
+    """ Vision Transformer """
+    def __init__(self, img_size=[224], patch_size=16, in_chans=3, num_classes=0, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
+                 drop_path_rate=0., norm_layer=nn.LayerNorm, **kwargs):
+        super().__init__()
+        self.num_features = self.embed_dim = embed_dim
+
+        self.patch_embed = PatchEmbed(
+            img_size=img_size[0], patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        num_patches = self.patch_embed.num_patches
+
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
+        self.blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer)
+            for i in range(depth)])
+        self.norm = norm_layer(embed_dim)
+
+        # Classifier head
+        self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+
+        trunc_normal_(self.pos_embed, std=.02)
+        trunc_normal_(self.cls_token, std=.02)
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+
+    def interpolate_pos_encoding(self, x, w, h):
+        npatch = x.shape[1] - 1
+        N = self.pos_embed.shape[1] - 1
+        if npatch == N and w == h:
+            return self.pos_embed
+        class_pos_embed = self.pos_embed[:, 0]
+        patch_pos_embed = self.pos_embed[:, 1:]
+        dim = x.shape[-1]
+        w0 = w // self.patch_embed.patch_size
+        h0 = h // self.patch_embed.patch_size
+        # we add a small number to avoid floating point error in the interpolation
+        # see discussion at https://github.com/facebookresearch/dino/issues/8
+        w0, h0 = w0 + 0.1, h0 + 0.1
+        patch_pos_embed = nn.functional.interpolate(
+            patch_pos_embed.reshape(1, int(math.sqrt(N)), int(math.sqrt(N)), dim).permute(0, 3, 1, 2),
+            scale_factor=(w0 / math.sqrt(N), h0 / math.sqrt(N)),
+            mode='bicubic',
+        )
+        assert int(w0) == patch_pos_embed.shape[-2] and int(h0) == patch_pos_embed.shape[-1]
+        patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
+        return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1)
+
+    def prepare_tokens(self, x):
+        B, nc, w, h = x.shape
+        x = self.patch_embed(x)  # patch linear embedding
+
+        # add the [CLS] token to the embed patch tokens
+        cls_tokens = self.cls_token.expand(B, -1, -1)
+        x = torch.cat((cls_tokens, x), dim=1)
+
+        # add positional encoding to each token
+        x = x + self.interpolate_pos_encoding(x, w, h)
+
+        return self.pos_drop(x)
+
+    def forward(self, x):
+        x = self.prepare_tokens(x)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        return x[:, 0]
+
+    def get_last_selfattention(self, x):
+        x = self.prepare_tokens(x)
+        for i, blk in enumerate(self.blocks):
+            if i < len(self.blocks) - 1:
+                x = blk(x)
+            else:
+                # return attention of the last block
+                return blk(x, return_attention=True)
+
+    def get_intermediate_layers(self, x, n=1):
+        x = self.prepare_tokens(x)
+        # we return the output tokens from the `n` last blocks
+        output = []
+        for i, blk in enumerate(self.blocks):
+            x = blk(x)
+            if len(self.blocks) - i <= n:
+                output.append(self.norm(x))
+        return output
+
+
+
+def vit_small(patch_size=16, **kwargs):
+    model = VisionTransformer(
+        patch_size=patch_size, embed_dim=384, depth=12, num_heads=6, mlp_ratio=4,
+        qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    return model
+
+
+def vit_base(patch_size=16, **kwargs):
+    model = VisionTransformer(
+        patch_size=patch_size, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4,
+        qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    return model
+
+
+
+
+class ViTFeat(nn.Module):
+    """ Vision Transformer """
+    def __init__(self, pretrained_pth, feat_dim, vit_arch = 'base', vit_feat = 'k', patch_size=16):
+        super().__init__()
+        if vit_arch == 'base' :
+            self.model = vit_base(patch_size=patch_size, num_classes=0)
+
+        else :
+            self.model = vit_small(patch_size=patch_size, num_classes=0)
+
+        self.feat_dim = feat_dim
+        self.vit_feat = vit_feat
+        self.patch_size = patch_size
+
+#        state_dict = torch.load(pretrained_pth, map_location="cpu")
+        state_dict = torch.hub.load_state_dict_from_url("https://dl.fbaipublicfiles.com"+pretrained_pth)
+        self.model.load_state_dict(state_dict, strict=True)
+        print('Loading weight from {}'.format(pretrained_pth))
+
+
+    def forward(self, img) :
+        feat_out = {}
+        def hook_fn_forward_qkv(module, input, output):
+            feat_out["qkv"] = output
+
+        self.model._modules["blocks"][-1]._modules["attn"]._modules["qkv"].register_forward_hook(hook_fn_forward_qkv)
+
+
+        # Forward pass in the model
+        with torch.no_grad() :
+            h, w = img.shape[2], img.shape[3]
+            feat_h, feat_w = h // self.patch_size, w // self.patch_size
+            attentions = self.model.get_last_selfattention(img)
+            bs, nb_head, nb_token = attentions.shape[0], attentions.shape[1], attentions.shape[2]
+            qkv = (
+                    feat_out["qkv"]
+                    .reshape(bs, nb_token, 3, nb_head, -1)
+                    .permute(2, 0, 3, 1, 4)
+                )
+            q, k, v = qkv[0], qkv[1], qkv[2]
+
+            k = k.transpose(1, 2).reshape(bs, nb_token, -1)
+            q = q.transpose(1, 2).reshape(bs, nb_token, -1)
+            v = v.transpose(1, 2).reshape(bs, nb_token, -1)
+
+            # Modality selection
+            if self.vit_feat == "k":
+                feats = k[:, 1:].transpose(1, 2).reshape(bs, self.feat_dim, feat_h * feat_w)
+            elif self.vit_feat == "q":
+                feats = q[:, 1:].transpose(1, 2).reshape(bs, self.feat_dim, feat_h * feat_w)
+            elif self.vit_feat == "v":
+                feats = v[:, 1:].transpose(1, 2).reshape(bs, self.feat_dim, feat_h * feat_w)
+            elif self.vit_feat == "kqv":
+                k = k[:, 1:].transpose(1, 2).reshape(bs, self.feat_dim, feat_h * feat_w)
+                q = q[:, 1:].transpose(1, 2).reshape(bs, self.feat_dim, feat_h * feat_w)
+                v = v[:, 1:].transpose(1, 2).reshape(bs, self.feat_dim, feat_h * feat_w)
+                feats = torch.cat([k, q, v], dim=1)
+            return feats
+
+
+if __name__ == "__main__":
+    vit_arch = 'base'
+    vit_feat = 'k'
+
+    model = ViTFeat(vit_arch, vit_feat)
+    img = torch.cuda.FloatTensor(4, 3, 224, 224)
+    model.cuda()
+    # Forward pass in the model
+    feat = model(img)
+    print (feat.shape)
@@ -0,0 +1,9 @@
+import PIL.Image as Image 
+
+def resize_pil(I, patch_size=16) : 
+    w, h = I.size
+
+    new_w, new_h = int(round(w / patch_size)) * patch_size, int(round(h / patch_size)) * patch_size
+    feat_w, feat_h = new_w // patch_size, new_h // patch_size
+
+    return I.resize((new_w, new_h), resample=Image.LANCZOS), w, h, feat_w, feat_h
@@ -0,0 +1,34 @@
+r""" Dataloader builder for few-shot semantic segmentation dataset  """
+from torchvision import transforms
+from torch.utils.data import DataLoader
+from data.fss import DatasetFSS
+
+class FSSDataset:
+
+    @classmethod
+    def initialize(cls, img_size, datapath, use_original_imgsize):
+
+        cls.datasets = {
+            'fss': DatasetFSS,
+        }
+
+        # FSS
+        cls.img_mean = [0.485, 0.456, 0.406]
+        cls.img_std  = [0.229, 0.224, 0.225]
+
+
+        cls.datapath = datapath
+        cls.use_original_imgsize = use_original_imgsize
+        
+        cls.transform = transforms.Compose([transforms.Resize(size=(img_size, img_size)),
+                                            transforms.ToTensor()])
+
+    @classmethod
+    def build_dataloader(cls, benchmark, bsz, nworker, fold, split, shot=1):
+        shuffle = split == 'trn'
+        nworker = nworker if split == 'trn' else 0
+
+        dataset = cls.datasets[benchmark](cls.datapath, fold=fold, transform=cls.transform, split=split, shot=shot, use_original_imgsize=cls.use_original_imgsize)
+        dataloader = DataLoader(dataset, batch_size=bsz, shuffle=shuffle, num_workers=nworker)
+
+        return dataloader
@@ -0,0 +1,128 @@
+r""" FSS-1000 few-shot semantic segmentation dataset """
+import os
+import glob
+
+from torch.utils.data import Dataset
+import torch.nn.functional as F
+import torch
+import PIL.Image as Image
+import numpy as np
+
+
+class DatasetFSS(Dataset):
+    def __init__(self, datapath, fold, transform, split, shot, use_original_imgsize):
+        self.split = split
+        self.benchmark = 'fss'
+        self.shot = shot
+
+        self.base_path = os.path.join(datapath, 'FSS-1000')
+
+        # Given predefined test split, load randomly generated training/val splits:
+        # (reference regarding trn/val/test splits: https://github.com/HKUSTCV/FSS-1000/issues/7))
+        with open('./data/splits/fss/%s.txt' % split, 'r') as f:
+            self.categories = f.read().split('\n')[:-1]
+        self.categories = sorted(self.categories)
+
+        self.class_ids = self.build_class_ids()
+        self.img_metadata = self.build_img_metadata()
+
+        self.transform = transform
+
+    def __len__(self):
+        return len(self.img_metadata)
+
+    def __getitem__(self, idx):
+        
+        
+        query_name, support_names, class_sample = self.sample_episode(idx)
+
+        query_img, query_mask, support_imgs, support_masks = self.load_frame(query_name, support_names)
+
+        query_img = self.transform(query_img)
+        query_mask = F.interpolate(query_mask.unsqueeze(0).unsqueeze(0).float(), query_img.size()[-2:], mode='nearest').squeeze()
+
+        support_imgs = torch.stack([self.transform(support_img) for support_img in support_imgs])
+
+        support_masks_tmp = []
+        for smask in support_masks:
+            smask = F.interpolate(smask.unsqueeze(0).unsqueeze(0).float(), support_imgs.size()[-2:], mode='nearest').squeeze()
+            support_masks_tmp.append(smask)
+        support_masks = torch.stack(support_masks_tmp)
+        
+        batch = {'query_img': query_img,
+                 'query_mask': query_mask,
+                 'query_name': query_name,
+
+                 'support_imgs': support_imgs,
+                 'support_masks': support_masks,
+                 'support_names': support_names,
+
+                 'class_id': torch.tensor(class_sample)}
+
+        return batch
+
+    def load_frame(self, query_name, support_names):
+        query_img = Image.open(query_name).convert('RGB')
+        support_imgs = [Image.open(name).convert('RGB') for name in support_names]
+
+        query_id = query_name.split('/')[-1].split('.')[0]
+        query_name = os.path.join(os.path.dirname(query_name), query_id) + '.png'
+        support_ids = [name.split('/')[-1].split('.')[0] for name in support_names]
+        if self.split=="test":
+            support_names = [os.path.join(os.path.dirname(name), sid) + '_mask.png' for name, sid in zip(support_names, support_ids)] 
+            # _mask.png   
+        else:
+            support_names = [os.path.join(os.path.dirname(name), sid) + '.png' for name, sid in zip(support_names, support_ids)]
+
+        query_mask = self.read_mask(query_name)
+        support_masks = [self.read_mask(name) for name in support_names]
+
+        return query_img, query_mask, support_imgs, support_masks
+
+    def read_mask(self, img_name):
+        mask = torch.tensor(np.array(Image.open(img_name).convert('L')))
+        mask[mask < 128] = 0
+        mask[mask >= 128] = 1
+        return mask
+
+    def sample_episode(self, idx):
+        query_name = self.img_metadata[idx]
+
+        class_sample = self.categories.index(query_name.split('/')[-2])
+        if self.split == 'val':
+            class_sample += 520
+        elif self.split == 'test':
+            class_sample += 760
+
+        support_names = []
+        while True:  # keep sampling support set if query == support
+            support_name = np.random.choice(range(1, 11), 1, replace=False)[0]
+            support_name = os.path.join(os.path.dirname(query_name), str(support_name)) + '.jpg'
+            support_name = support_name.replace("\\", "/")
+            if query_name != support_name: support_names.append(support_name)
+            if len(support_names) == self.shot: break
+
+        return query_name, support_names, class_sample
+
+    def build_class_ids(self):
+        if self.split == 'trn':
+            class_ids = range(0, 520)
+        elif self.split == 'val':
+            class_ids = range(520, 760)
+        elif self.split == 'test':
+            class_ids = range(760, 1000)
+        return class_ids
+
+    def build_img_metadata(self):
+        img_metadata = []
+        for cat in self.categories:
+            img_paths = sorted([path for path in glob.glob('%s/*' % os.path.join(self.base_path, cat))])
+            ## Correct address for windows
+            temp = []
+            for path in img_paths:
+                temp.append(path.replace("\\", "/"))    
+            img_paths = temp  
+            for img_path in img_paths:
+                if os.path.basename(img_path).split('.')[1] == 'jpg':
+                    img_metadata.append(img_path)
+        return img_metadata
@@ -0,0 +1,240 @@
+bus
+hotel_slipper
+burj_al
+reflex_camera
+abe's_flyingfish
+oiltank_car
+doormat
+fish_eagle
+barber_shaver
+motorbike
+feather_clothes
+wandering_albatross
+rice_cooker
+delta_wing
+fish
+nintendo_switch
+bustard
+diver
+minicooper
+cathedrale_paris
+big_ben
+combination_lock
+villa_savoye
+american_alligator
+gym_ball
+andean_condor
+leggings
+pyramid_cube
+jet_aircraft
+meatloaf
+reel
+swan
+osprey
+crt_screen
+microscope
+rubber_eraser
+arrow
+monkey
+mitten
+spiderman
+parthenon
+bat
+chess_king
+sulphur_butterfly
+quail_egg
+oriole
+iron_man
+wooden_boat
+anise
+steering_wheel
+groenendael
+dwarf_beans
+pteropus
+chalk_brush
+bloodhound
+moon
+english_foxhound
+boxing_gloves
+peregine_falcon
+pyraminx
+cicada
+screw
+shower_curtain
+tredmill
+bulb
+bell_pepper
+lemur_catta
+doughnut
+twin_tower
+astronaut
+nintendo_3ds
+fennel_bulb
+indri
+captain_america_shield
+kunai
+broom
+iphone
+earphone1
+flying_squirrel
+onion
+vinyl
+sydney_opera_house
+oyster
+harmonica
+egg
+breast_pump
+guitar
+potato_chips
+tunnel
+cuckoo
+rubick_cube
+plastic_bag
+phonograph
+net_surface_shoes
+goldfinch
+ipad
+mite_predator
+coffee_mug
+golden_plover
+f1_racing
+lapwing
+nintendo_gba
+pizza
+rally_car
+drilling_platform
+cd
+fly
+magpie_bird
+leaf_fan
+little_blue_heron
+carriage
+moist_proof_pad
+flying_snakes
+dart_target
+warehouse_tray
+nintendo_wiiu
+chiffon_cake
+bath_ball
+manatee
+cloud
+marimba
+eagle
+ruler
+soymilk_machine
+sled
+seagull
+glider_flyingfish
+doublebus
+transport_helicopter
+window_screen
+truss_bridge
+wasp
+snowman
+poached_egg
+strawberry
+spinach
+earphone2
+downy_pitch
+taj_mahal
+rocking_chair
+cablestayed_bridge
+sealion
+banana_boat
+pheasant
+stone_lion
+electronic_stove
+fox
+iguana
+rugby_ball
+hang_glider
+water_buffalo
+lotus
+paper_plane
+missile
+flamingo
+american_chamelon
+kart
+chinese_knot
+cabbage_butterfly
+key
+church
+tiltrotor
+helicopter
+french_fries
+water_heater
+snow_leopard
+goblet
+fan
+snowplow
+leafhopper
+pspgo
+black_bear
+quail
+condor
+chandelier
+hair_razor
+white_wolf
+toaster
+pidan
+pyramid
+chicken_leg
+letter_opener
+apple_icon
+porcupine
+chicken
+stingray
+warplane
+windmill
+bamboo_slip
+wig
+flying_geckos
+stonechat
+haddock
+australian_terrier
+hover_board
+siamang
+canton_tower
+santa_sledge
+arch_bridge
+curlew
+sushi
+beet_root
+accordion
+leaf_egg
+stealth_aircraft
+stork
+bucket
+hawk
+chess_queen
+ocarina
+knife
+whippet
+cantilever_bridge
+may_bug
+wagtail
+leather_shoes
+wheelchair
+shumai
+speedboat
+vacuum_cup
+chess_knight
+pumpkin_pie
+wooden_spoon
+bamboo_dragonfly
+ganeva_chair
+soap
+clearwing_flyingfish
+pencil_sharpener1
+cricket
+photocopier
+nintendo_sp
+samarra_mosque
+clam
+charge_battery
+flying_frog
+ferrari911
+polo_shirt
+echidna
+coin
+tower_pisa
@@ -0,0 +1,240 @@
+handcuff
+mortar
+matchstick
+wine_bottle
+dowitcher
+triumphal_arch
+gyromitra
+hatchet
+airliner
+broccoli
+olive
+pubg_lvl3backpack
+calculator
+toucan
+shovel
+sewing_machine
+icecream
+woodpecker
+pig
+relay_stick
+mcdonald_sign
+cpu
+peanut
+pumpkin
+sturgeon
+hammer
+hami_melon
+squirrel_monkey
+shuriken
+power_drill
+pingpong_ball
+crocodile
+carambola
+monarch_butterfly
+drum
+water_tower
+panda
+toilet_brush
+pay_phone
+yonex_icon
+cricketball
+revolver
+chimpanzee
+crab
+corn
+baseball
+rabbit
+croquet_ball
+artichoke
+abacus
+harp
+bell
+gas_tank
+scissors
+vase
+upright_piano
+typewriter
+bittern
+impala
+tray
+fire_hydrant
+beer_bottle
+sock
+soup_bowl
+spider
+cherry
+macaw
+toilet_seat
+fire_balloon
+french_ball
+fox_squirrel
+volleyball
+cornmeal
+folding_chair
+pubg_airdrop
+beagle
+skateboard
+narcissus
+whiptail
+cup
+arabian_camel
+badger
+stopwatch
+ab_wheel
+ox
+lettuce
+monocycle
+redshank
+vulture
+whistle
+smoothing_iron
+mashed_potato
+conveyor
+yoga_pad
+tow_truck
+siamese_cat
+cigar
+white_stork
+sniper_rifle
+stretcher
+tulip
+handkerchief
+basset
+iceberg
+gibbon
+lacewing
+thrush
+cheetah
+bighorn_sheep
+espresso_maker
+pretzel
+english_setter
+sandbar
+cheese
+daisy
+arctic_fox
+briard
+colubus
+balance_beam
+coffeepot
+soap_dispenser
+yawl
+consomme
+parking_meter
+cactus
+turnstile
+taro
+fire_screen
+digital_clock
+rose
+pomegranate
+bee_eater
+schooner
+ski_mask
+jay_bird
+plaice
+red_fox
+syringe
+camomile
+pickelhaube
+blenheim_spaniel
+pear
+parachute
+common_newt
+bowtie
+cigarette
+oscilloscope
+laptop
+african_crocodile
+apron
+coconut
+sandal
+kwanyin
+lion
+eel
+balloon
+crepe
+armadillo
+kazoo
+lemon
+spider_monkey
+tape_player
+ipod
+bee
+sea_cucumber
+suitcase
+television
+pillow
+banjo
+rock_snake
+partridge
+platypus
+lycaenid_butterfly
+pinecone
+conversion_plug
+wolf
+frying_pan
+timber_wolf
+bluetick
+crayon
+giant_schnauzer
+orang
+scarerow
+kobe_logo
+loguat
+saxophone
+ceiling_fan
+cardoon
+equestrian_helmet
+louvre_pyramid
+hotdog
+ironing_board
+razor
+nagoya_castle
+loggerhead_turtle
+lipstick
+cradle
+strongbox
+raven
+kit_fox
+albatross
+flat-coated_retriever
+beer_glass
+ice_lolly
+sungnyemun
+totem_pole
+vacuum
+bolete
+mango
+ginger
+weasel
+cabbage
+refrigerator
+school_bus
+hippo
+tiger_cat
+saltshaker
+piano_keyboard
+windsor_tie
+sea_urchin
+microsd
+barbell
+swim_ring
+bulbul_bird
+water_ouzel
+ac_ground
+sweatshirt
+umbrella
+hair_drier
+hammerhead_shark
+tomato
+projector
+cushion
+dishwasher
+three-toed_sloth
+tiger_shark
+har_gow
+baby
+thor's_hammer
+nike_logo
@@ -0,0 +1,221 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class conv_block(nn.Module):
+    def __init__(self,ch_in,ch_out):
+        super(conv_block,self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(ch_in, ch_out, kernel_size=3,stride=1,padding=1,bias=True),
+            nn.BatchNorm2d(ch_out),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(ch_out, ch_out, kernel_size=3,stride=1,padding=1,bias=True),
+            nn.BatchNorm2d(ch_out),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self,x):
+        #print(x.shape)
+        x = self.conv(x)
+        return x
+
+class up_conv(nn.Module):
+    def __init__(self,ch_in,ch_out,kernel_size=3, stride=1, padding=1, groups=1):
+        super(up_conv,self).__init__()
+        self.up = nn.Sequential(
+            nn.Upsample(scale_factor=2),
+            nn.Conv2d(ch_in,ch_out,kernel_size=kernel_size,stride=stride,padding=padding,bias=True),
+		    nn.BatchNorm2d(ch_out),
+	            nn.ReLU(inplace=True)
+        )
+
+    def forward(self,x):
+        x = self.up(x)
+        return x
+
+        
+class Attention_block(nn.Module):
+    def __init__(self,F_g,F_l,F_int):
+        super(Attention_block,self).__init__()
+        self.dim = F_l
+        self.W_g = nn.Sequential(
+            nn.Conv2d(F_g, F_int, kernel_size=1,stride=1,padding=0,bias=True),
+            nn.BatchNorm2d(F_int)
+        )
+        
+        self.W_x = nn.Sequential(
+            nn.Conv2d(F_l, F_int, kernel_size=1,stride=1,padding=0,bias=True),
+            nn.BatchNorm2d(F_int)
+        )
+
+        self.psi = nn.Sequential(
+            nn.Conv2d(F_int, 1, kernel_size=1,stride=1,padding=0,bias=True),
+            nn.BatchNorm2d(1),
+            nn.Sigmoid()
+        )
+        
+        self.relu = nn.ReLU(inplace=True)
+        # self.conv1_1 = nn.Conv2d(2*F_l, F_l, kernel_size=1,stride=1,padding=0,bias=True)
+        self.conv3d = nn.Conv3d(2, 1, 3, padding=1)
+
+    def forward(self,g,x, pad = (0, 1, 0, 1)):
+        x = torch.concat([x[:,:self.dim].unsqueeze(dim = 1), x[:,self.dim:].unsqueeze(dim = 1)], dim=1)
+        x = self.conv3d(x).squeeze(dim = 1)        
+        x = F.pad(x, pad, mode='replicate')
+        g1 = self.W_g(g)
+        x1 = self.W_x(x)
+        psi = self.relu(g1+x1)
+        psi = self.psi(psi)
+        return x*psi
+
+class ChannelAttention(nn.Module):
+    def __init__(self, in_planes, ratio=16):
+        super(ChannelAttention, self).__init__()
+        self.in_planes = in_planes
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+
+        self.fc1   = nn.Conv2d(in_planes, in_planes // 16, 1, bias=False)
+        self.relu1 = nn.ReLU()
+        self.fc2   = nn.Conv2d(in_planes // 16, in_planes, 1, bias=False)
+
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_pool_out = self.avg_pool(x) 
+        avg_out = self.fc2(self.relu1(self.fc1(avg_pool_out)))
+        #print(x.shape)
+        max_pool_out= self.max_pool(x) #torch.topk(x,3, dim=1).values
+
+        max_out = self.fc2(self.relu1(self.fc1(max_pool_out)))
+        out = avg_out + max_out
+        return self.sigmoid(out) 
+
+class SpatialAttention(nn.Module):
+    def __init__(self, kernel_size=7):
+        super(SpatialAttention, self).__init__()
+
+        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
+        padding = 3 if kernel_size == 7 else 1
+
+        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = torch.mean(x, dim=1, keepdim=True)
+        max_out, _ = torch.max(x, dim=1, keepdim=True)
+        x = torch.cat([avg_out, max_out], dim=1)
+        x = self.conv1(x)
+        return self.sigmoid(x)    
+
+class AdaptiveLKA(nn.Module):
+    def __init__(self, dim, use3d = False):
+        super().__init__()
+        self.dim = dim
+        self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
+        self.conv_spatial = nn.Conv2d(dim, dim, 7, stride=1, padding=9, groups=dim, dilation=3)
+        self.conv1 = nn.Conv2d(dim, dim, 1)
+        self.use3d = use3d
+        if use3d:
+            self.conv3d = nn.Conv3d(2, 1, 3, padding=1)
+
+    def forward(self, x):
+        if self.use3d:
+            x = torch.concat([x[:,:self.dim].unsqueeze(dim = 1), x[:,self.dim:].unsqueeze(dim = 1)], dim=1)
+            x = self.conv3d(x).squeeze(dim = 1)
+        u = x.clone()        
+        attn = self.conv0(x)
+        attn = self.conv_spatial(attn)
+        attn = self.conv1(attn)
+
+        return u * attn
+    
+class LKA_decoder(nn.Module):
+    def __init__(self, channels=[2048,1024,512,256]):
+        super(LKA_decoder,self).__init__()
+        self.channels = channels
+        self.Conv_1x1 = nn.Conv2d(2*channels[0],2*channels[0],kernel_size=1,stride=1,padding=0)
+        self.ConvBlock4 = conv_block(ch_in=channels[0], ch_out=channels[0])
+	
+        self.Up3 = up_conv(ch_in=channels[0],ch_out=channels[1])
+        self.AG3 = Attention_block(F_g=channels[1],F_l=channels[1],F_int=channels[2])
+        self.ConvBlock3 = conv_block(ch_in=channels[1], ch_out=channels[1])
+
+        self.Up2 = up_conv(ch_in=channels[1],ch_out=channels[2])
+        self.AG2 = Attention_block(F_g=channels[2],F_l=channels[2],F_int=channels[3])
+        self.ConvBlock2 = conv_block(ch_in=channels[2], ch_out=channels[2])
+        
+        self.Up1 = up_conv(ch_in=channels[2],ch_out=channels[3])
+        self.AG1 = Attention_block(F_g=channels[3],F_l=channels[3],F_int=int(channels[3]/2))
+        self.ConvBlock1 = conv_block(ch_in=channels[3], ch_out=channels[3])
+        
+        self.CA4 = ChannelAttention(channels[0])
+        self.CA3 = ChannelAttention(channels[1])
+        self.CA2 = ChannelAttention(channels[2])
+        self.CA1 = ChannelAttention(channels[3])
+
+        self.ALKA4 = AdaptiveLKA(dim=channels[0], use3d = True)
+        self.ALKA3 = AdaptiveLKA(dim=channels[1])
+        self.ALKA2 = AdaptiveLKA(dim=channels[2])
+        self.ALKA1 = AdaptiveLKA(dim=channels[3])
+
+        
+        self.SA = SpatialAttention()
+        self.Upf = up_conv(ch_in=channels[3],ch_out=32)
+        self.decoderf = nn.Sequential(nn.Conv2d(32, 16, (3, 3), padding=(1, 1), bias=True),
+                                nn.ReLU(),
+                                nn.Conv2d(16, 2, (3, 3), padding=(1, 1), bias=True))
+    def forward(self,x, skips):
+        d4 = self.Conv_1x1(x)
+        # CAM4
+        d4 = self.ALKA4(d4)
+
+        d4 = self.ConvBlock4(d4)
+
+        # upconv3
+        d3 = self.Up3(d4)
+
+        # AG3
+        x3 = self.AG3(g=d3,x=skips[0])
+        # aggregate 3
+        d3 = d3 + x3
+        
+            
+        # CAM3
+        d3 = self.ALKA3(d3)      
+        d3 = self.ConvBlock3(d3)
+                 
+        # upconv2
+        d2 = self.Up2(d3)
+        
+        # AG2
+        x2 = self.AG2(g=d2,x=skips[1], pad = (0, 2, 0, 2))
+        
+        # aggregate 2
+        d2 = d2 + x2
+        
+        # CAM2
+        d2 = self.ALKA2(d2)
+
+        d2 = self.ConvBlock2(d2)
+        
+        # upconv1
+        d1 = self.Up1(d2)
+        
+        #print(skips[2])
+        # AG1
+        x1 = self.AG1(g=d1,x=skips[2], pad = (0, 4, 0, 4))
+        
+        # aggregate 1
+        d1 = d1 + x1
+
+        # CAM1
+
+        d1 = self.ALKA1(d1)
+        d1 = self.ConvBlock1(d1)
+
+        d1 = self.decoderf(self.Upf(d1))
+
+        return d1
+    
@@ -0,0 +1,114 @@
+#python 
+""" few shot network """
+from functools import reduce
+from operator import add
+from .decoder import LKA_decoder
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.models import resnet
+
+class fewshotnet(nn.Module):
+    def __init__(self, use_original_imgsize = False):
+        super(fewshotnet, self).__init__()
+        self.use_original_imgsize = use_original_imgsize
+        self.backbone = resnet.resnet50(pretrained=True)
+        self.feat_ids = [3,7,13,16]#list(range(2, 17))
+        self.extract_feats = extract_feat_res
+        nbottlenecks = [3, 4, 6, 3]
+        self.bottleneck_ids = reduce(add, list(map(lambda x: list(range(x)), nbottlenecks)))
+        self.lids = reduce(add, [[i + 1] * x for i, x in enumerate(nbottlenecks)])
+        self.stack_ids = torch.tensor(self.lids).bincount().__reversed__().cumsum(dim=0)[:3]
+        self.backbone.eval()
+
+        self.cross_entropy_loss = nn.CrossEntropyLoss()
+        self.decoder = LKA_decoder()
+
+    def forward(self, query_img, support_img, support_mask):
+        with torch.no_grad():
+            query_feats = self.extract_feats(query_img, self.backbone, self.feat_ids, self.bottleneck_ids, self.lids)
+            support_feats = self.extract_feats(support_img, self.backbone, self.feat_ids, self.bottleneck_ids, self.lids)
+            support_feats = self.mask_feature(support_feats, support_mask.clone())
+        
+        for idx, (feat_s, feat_q) in enumerate(zip(support_feats, query_feats)):
+            query_feats[idx] = torch.concatenate([feat_s, feat_q], dim =1)
+        
+        logit_mask = self.decoder(query_feats[3], [query_feats[2], query_feats[1], query_feats[0]])
+    
+        if not self.use_original_imgsize:
+            logit_mask = F.interpolate(logit_mask, support_img.size()[2:], mode='bilinear', align_corners=True)
+
+        return logit_mask
+
+    def mask_feature(self, features, support_mask):
+        for idx, feature in enumerate(features):
+            mask = F.interpolate(support_mask.unsqueeze(1).float(), feature.size()[2:], mode='bilinear', align_corners=True)
+            temp = features[idx] * mask
+            features[idx] = temp
+            
+
+        return features
+
+    def predict_mask_nshot(self, batch, nshot, thresh = 0.5):
+        logit_mask_agg = 0
+        for s_idx in range(nshot):
+            logit_mask = self(batch['query_img'], batch['support_imgs'][:, s_idx], batch['support_masks'][:, s_idx])
+
+            if self.use_original_imgsize:
+                org_qry_imsize = tuple([batch['org_query_imsize'][1].item(), batch['org_query_imsize'][0].item()])
+                logit_mask = F.interpolate(logit_mask, org_qry_imsize, mode='bilinear', align_corners=True)
+
+            logit_mask_agg += logit_mask.argmax(dim=1).clone()
+            if nshot == 1: return logit_mask_agg
+
+        # Average & quantize predictions given threshold (=0.5)
+        bsz = logit_mask_agg.size(0)
+        max_vote = logit_mask_agg.view(bsz, -1).max(dim=1)[0]
+        max_vote = torch.stack([max_vote, torch.ones_like(max_vote).long()])
+        max_vote = max_vote.max(dim=0)[0].view(bsz, 1, 1)
+        pred_mask = logit_mask_agg.float() / max_vote
+        pred_mask[pred_mask < thresh] = 0
+        pred_mask[pred_mask >= thresh] = 1
+
+        return pred_mask
+
+    def compute_objective(self, logit_mask, gt_mask):
+        bsz = logit_mask.size(0)
+        logit_mask = logit_mask.view(bsz, 2, -1)
+        gt_mask = gt_mask.view(bsz, -1).long()
+
+        return self.cross_entropy_loss(logit_mask, gt_mask)
+
+    def train_mode(self):
+        self.train()
+        self.backbone.eval()  
+
+def extract_feat_res(img, backbone, feat_ids, bottleneck_ids, lids):
+    r""" Extract intermediate features from ResNet"""
+    feats = []
+
+    # Layer 0
+    feat = backbone.conv1.forward(img)
+    feat = backbone.bn1.forward(feat)
+    feat = backbone.relu.forward(feat)
+    feat = backbone.maxpool.forward(feat)
+
+    # Layer 1-4
+    for hid, (bid, lid) in enumerate(zip(bottleneck_ids, lids)):
+        res = feat
+        feat = backbone.__getattr__('layer%d' % lid)[bid].conv1.forward(feat)
+        feat = backbone.__getattr__('layer%d' % lid)[bid].bn1.forward(feat)
+        feat = backbone.__getattr__('layer%d' % lid)[bid].relu.forward(feat)
+        feat = backbone.__getattr__('layer%d' % lid)[bid].conv2.forward(feat)
+        feat = backbone.__getattr__('layer%d' % lid)[bid].bn2.forward(feat)
+        feat = backbone.__getattr__('layer%d' % lid)[bid].relu.forward(feat)
+        feat = backbone.__getattr__('layer%d' % lid)[bid].conv3.forward(feat)
+        feat = backbone.__getattr__('layer%d' % lid)[bid].bn3.forward(feat)
+
+        if bid == 0:
+            res = backbone.__getattr__('layer%d' % lid)[bid].downsample.forward(res)
+        feat += res
+        if hid + 1 in feat_ids:
+            feats.append(feat.clone())
+        feat = backbone.__getattr__('layer%d' % lid)[bid].relu.forward(feat)
+    return feats
@@ -0,0 +1,124 @@
+r""" Hypercorrelation Squeeze testing code """
+import argparse
+
+import torch.nn.functional as F
+import torch.nn as nn
+import torch
+
+from model.mymodel import fewshotnet
+from common.logger import Logger, AverageMeter
+from common.vis import Visualizer
+from common.evaluation import Evaluator
+from common import utils
+from data.dataset import FSSDataset
+import torch
+from torch.utils.data import Dataset,DataLoader
+import torch.nn as nn
+import numpy as np
+
+
+
+def compute_miou(Es_mask, qmask):
+    Es_mask, qmask = Es_mask.detach().cpu().numpy(), qmask.detach().cpu().numpy()
+    ious = 0.0
+    Es_mask = np.where(Es_mask> 0.5, 1. , 0.)
+    for idx in range(Es_mask.shape[0]):
+        notTrue = 1 -  qmask[idx]
+        union = np.sum(qmask[idx] + (notTrue * Es_mask[idx]))
+        intersection = np.sum(qmask[idx] * Es_mask[idx])
+        ious += (intersection / union)
+    miou = (ious / Es_mask.shape[0])
+    return miou
+
+
+
+def test(model, dataloader, nshot):
+    r""" Test HSNet """
+    miou = 0.0
+    # Freeze randomness during testing for reproducibility
+    utils.fix_randseed(0)
+    average_meter = AverageMeter(dataloader.dataset)
+    for idx, batch in enumerate(dataloader):
+        batch = utils.to_cuda(batch)
+        pred_mask = model.module.predict_mask_nshot(batch, nshot=nshot)
+
+        miou += compute_miou(pred_mask, batch['query_mask'])
+        # name = batch['query_name'][0][:-4]+'_pred.png'
+        
+        # mask = pred_mask.detach().cpu().numpy()[0]
+
+        # from PIL import Image
+        # from matplotlib import cm
+        # from matplotlib import pyplot as plt 
+
+        # plt.imsave(name, mask, cmap=cm.gray)
+
+    #     assert pred_mask.size() == batch['query_mask'].size()
+
+    #     # 2. Evaluate prediction
+    #     area_inter, area_union = Evaluator.classify_prediction(pred_mask.clone(), batch)
+    #     average_meter.update(area_inter, area_union, batch['class_id'], loss=None)
+    #     average_meter.write_process(idx, len(dataloader), epoch=-1, write_batch_idx=1)
+        
+
+    #     # Visualize predictions
+    #     if Visualizer.visualize:
+    #         Visualizer.visualize_prediction_batch(batch['support_imgs'], batch['support_masks'],
+    #                                               batch['query_img'], batch['query_mask'],
+    #                                               pred_mask, batch['class_id'], idx,
+    #                                               area_inter[1].float() / area_union[1].float())
+        
+    # # Write evaluation results
+    # average_meter.write_result('Test', 0)
+    # miou, fb_iou = average_meter.compute_iou()
+
+    return miou/(idx)*100.
+
+
+if __name__ == '__main__':
+
+    # Arguments parsing
+    parser = argparse.ArgumentParser(description='Annotation free few-shot segmentation Pytorch Implementation')
+    parser.add_argument('--datapath', type=str, default='D:/dataset/fewshot_data/')
+    parser.add_argument('--benchmark', type=str, default='fss')
+    parser.add_argument('--logpath', type=str, default='')
+    parser.add_argument('--bsz', type=int, default=24)
+    parser.add_argument('--nworker', type=int, default=0)
+    parser.add_argument('--load', type=str, default='./logs/fss_weightsnew.pt') 
+    parser.add_argument('--fold', type=int, default=0, choices=[0, 1, 2, 3])
+    parser.add_argument('--nshot', type=int, default=1)
+    parser.add_argument('--backbone', type=str, default='resnet50')
+    parser.add_argument('--visualize', default=False, action='store_true')
+    parser.add_argument('--use_original_imgsize', action='store_true')
+    args = parser.parse_args()
+    Logger.initialize(args, training=False)
+
+    # Model initialization
+    model = fewshotnet()
+    model.eval()
+    Logger.log_params(model)
+
+    # Device setup
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    Logger.info('# available GPUs: %d' % torch.cuda.device_count())
+    model = nn.DataParallel(model)
+    model.to(device)
+
+    # Load trained model
+    if args.load == '': raise Exception('Pretrained model not specified.')
+    model.load_state_dict(torch.load(args.load))
+    print('model created and weight file is loaded')
+
+    # Helper classes (for testing) initialization
+    Evaluator.initialize()
+    Visualizer.initialize(args.visualize)
+
+    # Dataset initialization
+    FSSDataset.initialize(img_size=400, datapath=args.datapath, use_original_imgsize=args.use_original_imgsize)
+    dataloader_test = FSSDataset.build_dataloader(args.benchmark, args.bsz, args.nworker, args.fold, 'test', args.nshot)
+
+
+    # Test HSNet
+    with torch.no_grad():
+        test_mio = test(model, dataloader_test, args.nshot)
+        print(f'Test MIO is:{test_mio}')
@@ -0,0 +1,105 @@
+r""" Hypercorrelation Squeeze training (validation) code """
+import argparse
+
+import torch.optim as optim
+import torch.nn as nn
+import torch
+
+from model.mymodel import fewshotnet
+from common.logger import Logger, AverageMeter
+from common.evaluation import Evaluator
+from common import utils
+from data.dataset import FSSDataset
+
+
+def train(epoch, model, dataloader, optimizer, training):
+    r""" Train HSNet """
+
+    # Force randomness during training / freeze randomness during testing
+    utils.fix_randseed(None) if training else utils.fix_randseed(0)
+    model.module.train_mode() if training else model.module.eval()
+    average_meter = AverageMeter(dataloader.dataset)
+
+    for idx, batch in enumerate(dataloader):
+
+        # 1. Hypercorrelation Squeeze Networks forward pass
+        batch = utils.to_cuda(batch)
+        logit_mask = model(batch['query_img'], batch['support_imgs'].squeeze(1), batch['support_masks'].squeeze(1))
+        pred_mask = logit_mask.argmax(dim=1)
+
+        # 2. Compute loss & update model parameters
+        loss = model.module.compute_objective(logit_mask, batch['query_mask'])
+        if training:
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+        # 3. Evaluate prediction
+        area_inter, area_union = Evaluator.classify_prediction(pred_mask, batch)
+        average_meter.update(area_inter, area_union, batch['class_id'], loss.detach().clone())
+        average_meter.write_process(idx, len(dataloader), epoch, write_batch_idx=50)
+
+    # Write evaluation results
+    average_meter.write_result('Training' if training else 'Validation', epoch)
+    avg_loss = utils.mean(average_meter.loss_buf)
+    miou, fb_iou = average_meter.compute_iou()
+
+    return avg_loss, miou, fb_iou
+
+if __name__ == '__main__':
+
+    # Arguments parsing
+    parser = argparse.ArgumentParser(description='Annotation free few-shot segmentation Pytorch Implementation')
+    parser.add_argument('--datapath', type=str, default='D:/dataset/fewshot_data/')
+    parser.add_argument('--benchmark', type=str, default='fss')
+    parser.add_argument('--logpath', type=str, default='')
+    parser.add_argument('--bsz', type=int, default=20)
+    parser.add_argument('--lr', type=float, default=1e-3)
+    parser.add_argument('--niter', type=int, default=2000)
+    parser.add_argument('--nworker', type=int, default=8)
+    parser.add_argument('--fold', type=int, default=0, choices=[0, 1, 2, 3])
+    parser.add_argument('--backbone', type=str, default='resnet50')
+    args = parser.parse_args()
+    Logger.initialize(args, training=True)
+
+    # Model initialization
+    model = fewshotnet()
+    Logger.log_params(model)
+
+    # Device setup
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    Logger.info('# available GPUs: %d' % torch.cuda.device_count())
+    model = nn.DataParallel(model)
+    model.to(device)
+
+    # Helper classes (for training) initialization
+    optimizer = optim.Adam([{"params": model.parameters(), "lr": args.lr}])
+    Evaluator.initialize()
+
+    # Dataset initialization
+    FSSDataset.initialize(img_size=400, datapath=args.datapath, use_original_imgsize=False)
+    dataloader_trn = FSSDataset.build_dataloader(args.benchmark, args.bsz, args.nworker, args.fold, 'trn')
+    dataloader_val = FSSDataset.build_dataloader(args.benchmark, args.bsz, args.nworker, args.fold, 'val')
+
+    # Train HSNet
+    best_val_miou = float('-inf')
+    best_val_loss = float('inf')
+    for epoch in range(args.niter):
+
+        trn_loss, trn_miou, trn_fb_iou = train(epoch, model, dataloader_trn, optimizer, training=True)
+        with torch.no_grad():
+            val_loss, val_miou, val_fb_iou = train(epoch, model, dataloader_val, optimizer, training=False)
+
+        # Save the best model
+        if val_miou > best_val_miou:
+            best_val_miou = val_miou
+            Logger.save_model_miou(model, epoch, val_miou)
+            torch.save(model.state_dict(), './logs/fss_weights.pt')
+
+
+        Logger.tbd_writer.add_scalars('data/loss', {'trn_loss': trn_loss, 'val_loss': val_loss}, epoch)
+        Logger.tbd_writer.add_scalars('data/miou', {'trn_miou': trn_miou, 'val_miou': val_miou}, epoch)
+        Logger.tbd_writer.add_scalars('data/fb_iou', {'trn_fb_iou': trn_fb_iou, 'val_fb_iou': val_fb_iou}, epoch)
+        Logger.tbd_writer.flush()
+    Logger.tbd_writer.close()
+    Logger.info('==================== Finished Training ====================')