|
|
|
|
|
|
|
|
|
|
|
from datasets import load_dataset |
|
import pandas as pd |
|
import statistics |
|
from torch.utils.data import Dataset, DataLoader |
|
import clip |
|
import torch |
|
from PIL import Image, ImageFile |
|
import numpy as np |
|
import time |
|
|
|
def normalized(a, axis=-1, order=2): |
|
import numpy as np |
|
|
|
l2 = np.atleast_1d(np.linalg.norm(a, order, axis)) |
|
l2[l2 == 0] = 1 |
|
return a / np.expand_dims(l2, axis) |
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model, preprocess = clip.load("ViT-L/14", device=device) |
|
|
|
|
|
f = "trainingdata.parquet" |
|
df = pd.read_parquet(f) |
|
|
|
|
|
x = [] |
|
y = [] |
|
c= 0 |
|
|
|
for idx, row in df.iterrows(): |
|
start = time.time() |
|
|
|
average_rating = float(row.AVERAGE_RATING) |
|
print(average_rating) |
|
if average_rating <1: |
|
continue |
|
|
|
img= row.IMAGEPATH |
|
print(img) |
|
|
|
try: |
|
image = preprocess(Image.open(img)).unsqueeze(0).to(device) |
|
except: |
|
continue |
|
|
|
with torch.no_grad(): |
|
image_features = model.encode_image(image) |
|
|
|
im_emb_arr = image_features.cpu().detach().numpy() |
|
x.append(normalized ( im_emb_arr) ) |
|
y_ = np.zeros((1, 1)) |
|
y_[0][0] = average_rating |
|
|
|
|
|
y.append(y_) |
|
|
|
|
|
print(c) |
|
c+=1 |
|
|
|
|
|
|
|
|
|
x = np.vstack(x) |
|
y = np.vstack(y) |
|
print(x.shape) |
|
print(y.shape) |
|
np.save('x_OpenAI_CLIP_L14_embeddings.npy', x) |
|
np.save('y_ratings.npy', y) |
|
|