Best Python code snippet using refurb_python
text_prompt.py
Source:text_prompt.py
1from typing import List, Tuple2import clip3import torch4from torch import nn5import numpy as np6from .utils import CLIP7class PromptExtractor(nn.Module):8 def __init__(self):9 super().__init__()10 self._buffer_init = False11 self.with_trainable_params = False12 def init_buffer(self, clip_model):13 self._buffer_init = True14 def forward(self, noun_list: List[str], clip_model: nn.Module):15 raise NotImplementedError()16class PredefinedPromptExtractor(PromptExtractor):17 def __init__(self, templates: List[str]):18 super().__init__()19 self.templates = templates20 def forward(self, noun_list: List[str], clip_model: nn.Module):21 text_features_bucket = []22 for template in self.templates:23 noun_tokens = [clip.tokenize(template.format(noun)) for noun in noun_list]24 text_inputs = torch.cat(noun_tokens).to(25 clip_model.text_projection.data.device26 )27 text_features = clip_model.encode_text(text_inputs)28 text_features /= text_features.norm(dim=-1, keepdim=True)29 text_features_bucket.append(text_features)30 del text_inputs31 # ensemble by averaging32 text_features = torch.stack(text_features_bucket).mean(dim=0)33 text_features = text_features / text_features.norm(dim=-1, keepdim=True)34 return text_features35class ImageNetPromptExtractor(PredefinedPromptExtractor):36 def __init__(self):37 super().__init__(CLIP.IMAGENET_PROMPT)38class VILDPromptExtractor(PredefinedPromptExtractor):39 def __init__(self):40 super().__init__(CLIP.VILD_PROMPT)41class LearnablePromptExtractor(PromptExtractor):42 def __init__(self, prompt_dim: int, prompt_shape: Tuple[int, int]):43 super().__init__()44 assert len(prompt_shape) == 2, "prompt_shape must be a tuple of length 2"45 self.prompt_dim = prompt_dim46 self.prompt_shape = prompt_shape47 self.prefix_prompt = self._init_prompt(self.n_prefix)48 self.suffix_prompt = self._init_prompt(self.n_suffix)49 self._buffer_init = False50 self.with_trainable_params = True51 def _init_prompt(self, length):52 if length == 0:53 return None54 prompt_tensor = torch.empty(length, self.prompt_dim)55 nn.init.normal_(prompt_tensor, std=0.02)56 return nn.Parameter(prompt_tensor)57 def init_buffer(self, clip_model):58 sentence = "X."59 prompt = clip.tokenize(sentence)60 with torch.no_grad():61 embedding = clip_model.token_embedding(prompt).type(62 clip_model.dtype63 ) # 2,77,51264 self.register_buffer("start_signal", embedding[0, :1, :]) # 1,51265 self.register_buffer("dot_signal", embedding[0, 2:3, :]) # 1,51266 self.register_buffer("end_signal", embedding[0, 3:4, :]) # 1,51267 self.register_buffer("pad_signal", embedding[0, 4:5, :]) # 1,51268 self.noun_bucket = {}69 self._buffer_init = True70 def forward(self, noun_list: List[str], clip_model: nn.Module):71 if not self._buffer_init:72 raise RuntimeError(73 f"Buffer of {self.__class__.__name__} is not initialized"74 )75 self._update_noun_features(noun_list, clip_model)76 prefix = [self.start_signal]77 if self.prefix_prompt is not None:78 prefix.append(self.prefix_prompt)79 prefix = torch.cat(prefix)80 suffix = [self.dot_signal, self.end_signal]81 if self.suffix_prompt is not None:82 suffix.insert(0, self.suffix_prompt)83 suffix = torch.cat(suffix)84 # only process those which are not in bucket85 lengths = [86 len(prefix) + len(suffix) + len(self.noun_bucket[noun])87 for noun in noun_list88 ]89 embeddings = torch.stack(90 [91 torch.cat(92 [prefix, self.noun_bucket[noun], suffix]93 + [self.pad_signal.expand(77 - length, -1)]94 )95 for noun, length in zip(noun_list, lengths)96 ]97 ) # cls,77,51298 indices = torch.Tensor(lengths).long().to(embeddings.device) - 199 text_features = self.get_text_feature(embeddings, indices, clip_model)100 text_features = text_features / text_features.norm(dim=-1, keepdim=True)101 return text_features102 def _update_noun_features(self, noun_list, clip_model):103 left_class_names = [noun for noun in noun_list if noun not in self.noun_bucket]104 if len(left_class_names) > 0:105 with torch.no_grad():106 tokens, name_lengths = clip.tokenize(107 left_class_names, return_length=True108 )109 name_lengths = [110 n - 2 for n in name_lengths111 ] # remove start end end prompt112 text_embeddings = clip_model.token_embedding(113 tokens.to(self.device)114 ).type(clip_model.dtype)115 text_embeddings = [116 embedding[1 : 1 + length]117 for embedding, length in zip(text_embeddings, name_lengths)118 ]119 self.noun_bucket.update(120 {121 name: embedding122 for name, embedding in zip(left_class_names, text_embeddings)123 }124 )125 @staticmethod126 def get_text_feature(x, indices, clip_model):127 x = x + clip_model.positional_embedding.type(clip_model.dtype)128 x = x.permute(1, 0, 2) # NLD -> LND129 x = clip_model.transformer(x)130 x = x.permute(1, 0, 2) # LND -> NLD131 x = clip_model.ln_final(x).type(clip_model.dtype)132 # take features from the eot embedding (eot_token is the highest number in each sequence)133 x = x[torch.arange(x.shape[0]), indices] @ clip_model.text_projection134 return x135 @property136 def n_prefix(self):137 return self.prompt_shape[0]138 @property139 def n_suffix(self):140 return self.prompt_shape[1]141 @property142 def device(self):143 return self.start_signal.device144 def extra_repr(self) -> str:145 r"""Set the extra representation of the module146 To print customized extra information, you should re-implement147 this method in your own modules. Both single-line and multi-line148 strings are acceptable.149 """150 repr = f"prefix_prompt:{self.n_prefix},suffix_prompt:{self.n_suffix},dimension:{self.prompt_dim}\n"151 repr = repr + "[Normal_Init(mu=0,std=0.02)]"152 return repr153class InstanceLearnablePromptExtractor(LearnablePromptExtractor):154 def __init__(self, prompt_dim: int, prompt_shape: Tuple[int, int], img_feat_dim: int, instance_weight):155 super().__init__(prompt_dim, prompt_shape)156 # intermediate_dim = int(np.sqrt(img_feat_dim * self.prompt_dim))157 intermediate_dim = 4 * self.prompt_dim158 self.intermediate_dim = intermediate_dim159 # print("text prompt wabble: ", img_feat_dim, intermediate_dim, self.prompt_dim)160 self.img_tokenizer = nn.Sequential(161 nn.Linear(img_feat_dim, intermediate_dim),162 nn.ReLU(),163 nn.Linear(intermediate_dim, self.prompt_dim),164 nn.ReLU(),165 )166 self.instance_weight = instance_weight167 168 def forward(self, noun_list: List[str], clip_model: nn.Module, img_feat):169 if not self._buffer_init:170 raise RuntimeError(171 f"Buffer of {self.__class__.__name__} is not initialized"172 )173 self._update_noun_features(noun_list, clip_model)174 prefix = [self.start_signal]175 if self.prefix_prompt is not None:176 prefix.append(self.prefix_prompt)177 prefix = torch.cat(prefix)178 suffix = [self.dot_signal, self.end_signal]179 if self.suffix_prompt is not None:180 suffix.insert(0, self.suffix_prompt)181 suffix = torch.cat(suffix)182 # only process those which are not in bucket183 lengths = [184 len(prefix) + len(suffix) + len(self.noun_bucket[noun])185 for noun in noun_list186 ]187 img_tokens = self.img_tokenizer(img_feat)188 # print(img_feat.shape, self.intermediate_dim, img_tokens.shape)189 # print(prefix.shape, suffix.shape, img_tokens.shape, img_feat.shape)190 # prefix += img_tokens191 # suffix += img_tokens192 # print(img_tokens.shape)193 embeddings = torch.stack(194 [195 torch.cat(196 [prefix, self.noun_bucket[noun], suffix]197 + [self.pad_signal.expand(77 - length, -1)]198 ) + self.instance_weight * img_tokens199 for noun, length in zip(noun_list, lengths)200 ]201 ) # cls,77,512202 indices = torch.Tensor(lengths).long().to(embeddings.device) - 1203 text_features = self.get_text_feature(embeddings, indices, clip_model)204 text_features = text_features / text_features.norm(dim=-1, keepdim=True)...
utils.py
Source:utils.py
1from typing import Union, List2import streamlit as st3from transformers.models.gpt_neo.modeling_gpt_neo import GPTNeoPreTrainedModel4from ..utils import compute_attn_logits5def sample_text(model, col_idx, key):6 text = st.session_state[key]7 if st.session_state.prefix_prompt is not None and len(st.session_state.prefix_prompt) > 0:8 text = st.session_state.prefix_prompt + '\n' + text9 model_inputs = st.session_state.tokenizer.encode(text, return_tensors='pt').to(st.session_state.device)10 output = model.model.generate(model_inputs, **st.session_state.sample_kwargs, min_length=0, output_attentions=True)11 output_text = st.session_state.tokenizer.decode(output[0], skip_special_tokens=True)12 if st.session_state.prefix_prompt is not None and len(st.session_state.prefix_prompt) > 0:13 output_text = output_text.lstrip(st.session_state.prefix_prompt + '\n')14 15 st.session_state["storage"][col_idx] = output_text16 text_change(col_idx=col_idx)17def on_text_change(col_idx: Union[int, List[int]], text_key):18 if isinstance(col_idx, list):19 for idx in col_idx:20 on_text_change(idx, text_key)21 else: 22 st.session_state["storage"][col_idx] = st.session_state[text_key]23 text_change(col_idx)24def get_attn_logits_args():25 # get args for compute_attn_logits26 if st.session_state.model_name in st.session_state.registered_model_names:27 attn_name = st.session_state.config['attn_name']28 output_idx = st.session_state.config['output_idx']29 layer_key_prefix = st.session_state.config['layer_key_prefix']30 out_proj_name = st.session_state.config['out_proj_name']31 attn_suffix = st.session_state.config['attn_suffix']32 unembedding_key = st.session_state.config['unembedding_key']33 elif isinstance(st.session_state.model.model, GPTNeoPreTrainedModel):34 attn_name = 'attn'35 output_idx = 236 layer_key_prefix = 'transformer->h'37 out_proj_name = 'out_proj'38 attn_suffix = 'attention'39 unembedding_key = 'lm_head'40 else:41 attn_name = 'attn'42 output_idx = 243 layer_key_prefix = 'transformer->h'44 out_proj_name = 'c_proj'45 attn_suffix = None46 unembedding_key = 'lm_head'47 return attn_name, output_idx, layer_key_prefix, out_proj_name, attn_suffix, unembedding_key48def text_change(col_idx: Union[int, List[int]]):49 if isinstance(col_idx, list):50 for idx in col_idx:51 text_change(idx)52 return53 54 text = st.session_state["storage"][col_idx]55 if st.session_state.prefix_prompt is not None and len(st.session_state.prefix_prompt) > 0:56 text = st.session_state.prefix_prompt + '\n' + text57 if text is None or len(text) == 0:58 return59 60 attn_name, output_idx, layer_key_prefix, out_proj_name, attn_suffix, unembedding_key = get_attn_logits_args()61 62 if 'layer' in st.session_state:63 layer = st.session_state['layer']64 else:65 layer = None66 67 compute_attn_logits(68 st.session_state.model, 69 st.session_state.model_name, 70 st.session_state.tokenizer,71 st.session_state.num_layers,72 text, 73 st.session_state.visualization[f'col_{col_idx}'],74 attn_name = attn_name,75 output_idx = output_idx,76 layer_key_prefix = layer_key_prefix,77 out_proj_name = out_proj_name,78 attn_suffix = attn_suffix,79 unembedding_key = unembedding_key,80 layer_id = layer,...
data.py
Source:data.py
1from datasets import Dataset, load_dataset2from transformers import AutoTokenizer, DataCollatorWithPadding3from torch.utils.data import DataLoader4from helper import print_stage5from config import BATCH_SIZE6print_stage("Loading Data")7def load_emotion_dataset():8 return load_dataset("emotion")9dataset = load_emotion_dataset()10train_set = dataset["train"]11# dev_set = dataset["validation"]12dev_set = Dataset.from_dict(dataset["validation"][:5])13test_set = dataset["test"]14tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")15data_collator = DataCollatorWithPadding(tokenizer=tokenizer)16def prepare_tokenized_dataset(prefix_prompt="", suffix_prompt=""):17 def tokenize(examples):18 prompted_text = [19 f"{prefix_prompt} {text} {suffix_prompt}"20 for text in examples["text"]21 ]22 tokenized_inputs = tokenizer(23 prompted_text, truncation=True, padding=True24 )25 tokenized_inputs["labels"] = examples["label"]26 return tokenized_inputs27 return tokenize28def get_data_loader(split="train", prefix_prompt="", suffix_prompt=""):29 assert split in ["train", "dev", "test"]30 dataset_map = {31 "train": train_set,32 "dev": dev_set,33 "test": test_set,34 }35 tokenized_set = dataset_map[split].map(36 prepare_tokenized_dataset(37 prefix_prompt=prefix_prompt, suffix_prompt=suffix_prompt38 ),39 batched=True,40 batch_size=BATCH_SIZE,41 )42 tokenized_set = tokenized_set.remove_columns(["text", "label"])43 data_loader = DataLoader(44 tokenized_set, batch_size=BATCH_SIZE, collate_fn=data_collator45 )46 return data_loader47if __name__ == "__main__":48 # print(dataset["validation"][:5])49 dev_loader = get_data_loader("dev", prompt="emotion: [MASK]")50 for batch in dev_loader:51 print(batch)...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!