camenduru
/

improved-aesthetic-predictor

Model card Files Files and versions Community

improved-aesthetic-predictor / prepare-data-for-training.py

camenduru

thanks to christophschuhmann ❤

7b2449b almost 2 years ago

raw

history blame contribute delete

1.99 kB


	# This script prepares the training images and ratings for the training.
	# It assumes that all images are stored as files that PIL can read.
	# It also assumes that the paths to the images files and the average ratings are in a .parquet files that can be read into a dataframe ( df ).

	from datasets import load_dataset
	import pandas as pd
	import statistics
	from torch.utils.data import Dataset, DataLoader
	import clip
	import torch
	from PIL import Image, ImageFile
	import numpy as np
	import time

	def normalized(a, axis=-1, order=2):
	import numpy as np # pylint: disable=import-outside-toplevel

	l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
	l2[l2 == 0] = 1
	return a / np.expand_dims(l2, axis)



	device = "cuda" if torch.cuda.is_available() else "cpu"
	model, preprocess = clip.load("ViT-L/14", device=device)


	f = "trainingdata.parquet"
	df = pd.read_parquet(f) #assumes that the df has the columns IMAGEPATH & AVERAGE_RATING


	x = []
	y = []
	c= 0

	for idx, row in df.iterrows():
	start = time.time()

	average_rating = float(row.AVERAGE_RATING)
	print(average_rating)
	if average_rating <1:
	continue

	img= row.IMAGEPATH #assumes that the df has the column IMAGEPATH
	print(img)

	try:
	image = preprocess(Image.open(img)).unsqueeze(0).to(device)
	except:
	continue

	with torch.no_grad():
	image_features = model.encode_image(image)

	im_emb_arr = image_features.cpu().detach().numpy()
	x.append(normalized ( im_emb_arr) ) # all CLIP embeddings are getting normalized. This also has to be done when inputting an embedding later for inference
	y_ = np.zeros((1, 1))
	y_[0][0] = average_rating
	#y_[0][1] = stdev # I initially considered also predicting the standard deviation, but then didn't do it

	y.append(y_)


	print(c)
	c+=1




	x = np.vstack(x)
	y = np.vstack(y)
	print(x.shape)
	print(y.shape)
	np.save('x_OpenAI_CLIP_L14_embeddings.npy', x)
	np.save('y_ratings.npy', y)