Skip to content
This repository has been archived by the owner on Apr 7, 2023. It is now read-only.

Commit

Permalink
Updates for M1 mac
Browse files Browse the repository at this point in the history
  • Loading branch information
plord12 committed Dec 7, 2022
1 parent a6d09d8 commit bc5fc2f
Show file tree
Hide file tree
Showing 6 changed files with 878 additions and 55 deletions.
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ Common garden animals :
* Eastern Gray Squirrel :heavy_check_mark:
* Domestic Cat :heavy_check_mark:

Weeds :

* Common Dandelion :heavy_check_mark:

( :heavy_check_mark: indicates bounding boxes created )

## Tensor flow on mac :
Expand All @@ -51,13 +55,20 @@ Common garden animals :
* https://github.com/tensorflow/models/blob/master/research/object_detection/colab_tutorials/eager_few_shot_od_training_tf2_colab.ipynb
* https://neptune.ai/blog/how-to-train-your-own-object-detector-using-tensorflow-object-detection-api

## To run :
## To run on mac intel :

./install-tensorflow.bash
. Tensorflow/tf2_api_env/bin/activate
./data-prep.py
./train.py

## To run on mac M1 :

./install-tensorflow-macos-m1.sh
conda activate tensorflow
./data-prep.py
./train2.py

## Directories used :

* Tensorflow - installed tensorflow binaries
Expand Down
36 changes: 36 additions & 0 deletions create_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Script to create CSV data file from Pascal VOC annotation files
# Based off code from GitHub user datitran: https://github.com/datitran/raccoon_dataset/blob/master/xml_to_csv.py

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET

def xml_to_csv(path):
xml_list = []
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(float(member.find('bndbox').find('xmin').text)),
int(float(member.find('bndbox').find('ymin').text)),
int(float(member.find('bndbox').find('xmax').text)),
int(float(member.find('bndbox').find('ymax').text))
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df

def main():
for folder in ['train','validation']:
image_path = os.path.join(os.getcwd(), ('images/' + folder))
xml_df = xml_to_csv(image_path)
xml_df.to_csv(('images/' + folder + '_labels.csv'), index=None)
print('Successfully converted xml to csv.')

main()
214 changes: 170 additions & 44 deletions data-prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import re
import glob
from pathlib import Path
import random

#
# download and extract tgz
Expand All @@ -23,6 +24,17 @@ def download_extract_tgz(url, testfile):
thetarfile = tarfile.open(fileobj=ftpstream, mode="r|gz")
thetarfile.extractall()

#
# download and extract tar
#
def download_extract_tar(url, testfile):
print ('Downloading ' + url)
if not os.path.exists(testfile):
print ("Downloading "+testfile)
ftpstream = urllib.request.urlopen(url)
thetarfile = tarfile.open(fileobj=ftpstream, mode="r|")
thetarfile.extractall()

#
# copy interesting image directories
#
Expand All @@ -32,34 +44,53 @@ def copy_images(source, destination):
# 10 most common UK birds
#
dirs = {}
dirs['House Sparrow'] = '04132_Animalia_Chordata_Aves_Passeriformes_Passeridae_Passer_diffusus'
dirs['Blue Tit'] = '04031_Animalia_Chordata_Aves_Passeriformes_Paridae_Cyanistes_caeruleus'
dirs['European Starling'] = '04202_Animalia_Chordata_Aves_Passeriformes_Sturnidae_Sturnus_vulgaris'
dirs['Eurasian Blackbird'] = '04284_Animalia_Chordata_Aves_Passeriformes_Turdidae_Turdus_merula'
dirs['Wood Pigeon'] = '03516_Animalia_Chordata_Aves_Columbiformes_Columbidae_Columba_palumbus'
dirs['European Robin'] = '03978_Animalia_Chordata_Aves_Passeriformes_Muscicapidae_Erithacus_rubecula'
dirs['Great Tit'] = '04034_Animalia_Chordata_Aves_Passeriformes_Paridae_Parus_major'
dirs['Eurasian Goldfinch'] = '03809_Animalia_Chordata_Aves_Passeriformes_Fringillidae_Carduelis_carduelis'
dirs['Eurasian Magpie'] = '03775_Animalia_Chordata_Aves_Passeriformes_Corvidae_Pica_pica'
dirs['Long-tailed Tit'] = '03688_Animalia_Chordata_Aves_Passeriformes_Aegithalidae_Aegithalos_caudatus'
dirs['House_Sparrow'] = '04132_Animalia_Chordata_Aves_Passeriformes_Passeridae_Passer_diffusus'
dirs['Blue_Tit'] = '04031_Animalia_Chordata_Aves_Passeriformes_Paridae_Cyanistes_caeruleus'
dirs['European_Starling'] = '04202_Animalia_Chordata_Aves_Passeriformes_Sturnidae_Sturnus_vulgaris'
dirs['Eurasian_Blackbird'] = '04284_Animalia_Chordata_Aves_Passeriformes_Turdidae_Turdus_merula'
dirs['Wood_Pigeon'] = '03516_Animalia_Chordata_Aves_Columbiformes_Columbidae_Columba_palumbus'
dirs['European_Robin'] = '03978_Animalia_Chordata_Aves_Passeriformes_Muscicapidae_Erithacus_rubecula'
dirs['Great_Tit'] = '04034_Animalia_Chordata_Aves_Passeriformes_Paridae_Parus_major'
dirs['Eurasian_Goldfinch'] = '03809_Animalia_Chordata_Aves_Passeriformes_Fringillidae_Carduelis_carduelis'
dirs['Eurasian_Magpie'] = '03775_Animalia_Chordata_Aves_Passeriformes_Corvidae_Pica_pica'
dirs['Long-tailed_Tit'] = '03688_Animalia_Chordata_Aves_Passeriformes_Aegithalidae_Aegithalos_caudatus'

# Collared Dove
# Greenfinch
# Bullfinch
# Wren
# Carrion Crow
# Jackdaw
# Jay
# Dunnock
# Chaffinch
# Goldcrest

#
# extra birds
#
dirs['Red Kite'] = '03172_Animalia_Chordata_Aves_Accipitriformes_Accipitridae_Milvus_milvus'
dirs['Grey Heron'] = '04356_Animalia_Chordata_Aves_Pelecaniformes_Ardeidae_Ardea_cinerea'
dirs['Red_Kite'] = '03172_Animalia_Chordata_Aves_Accipitriformes_Accipitridae_Milvus_milvus'
dirs['Grey_Heron'] = '04356_Animalia_Chordata_Aves_Pelecaniformes_Ardeidae_Ardea_cinerea'

#
# common garden animals
#
dirs['Common Hedgehog'] = '04745_Animalia_Chordata_Mammalia_Eulipotyphla_Erinaceidae_Erinaceus_europaeus'
dirs['Red Fox'] = '04677_Animalia_Chordata_Mammalia_Carnivora_Canidae_Vulpes_vulpes'
dirs['Eastern Gray Squirrel'] = '04844_Animalia_Chordata_Mammalia_Rodentia_Sciuridae_Sciurus_carolinensis'
dirs['Domestic Cat'] = '04679_Animalia_Chordata_Mammalia_Carnivora_Felidae_Felis_catus'
dirs['Common_Hedgehog'] = '04745_Animalia_Chordata_Mammalia_Eulipotyphla_Erinaceidae_Erinaceus_europaeus'
dirs['Red_Fox'] = '04677_Animalia_Chordata_Mammalia_Carnivora_Canidae_Vulpes_vulpes'
dirs['Eastern_Gray_Squirrel'] = '04844_Animalia_Chordata_Mammalia_Rodentia_Sciuridae_Sciurus_carolinensis'
dirs['Domestic_Cat'] = '04679_Animalia_Chordata_Mammalia_Carnivora_Felidae_Felis_catus'

#
# people
#
# from voc ... although dramaticly deacreases mAP
#
# dirs['Person'] = 'VOCdevkit'

#
# weeds
#
dirs['Common_Dandelion'] = '07039_Plantae_Tracheophyta_Magnoliopsida_Asterales_Asteraceae_Taraxacum_officinale'

os.makedirs(destination, exist_ok=True)
for name, directory in dirs.items():
Expand All @@ -76,7 +107,7 @@ def label_img(data_dir):
# copy to tmp dir first so we only edit necessary files
#
tmp_data_dir = Path('images/tmp')
with open('classes.txt', "w") as file_out:
with open('labelmap.txt', "w") as file_out:
for name in classes.keys():
print (name, file=file_out)
for thisdirpath, subdirs, files in os.walk(data_dir):
Expand All @@ -88,7 +119,7 @@ def label_img(data_dir):
shutil.copyfile(os.path.join(thisdirpath, file), os.path.join(tmp_data_dir, file))

if os.path.exists(tmp_data_dir):
os.system('labelImg '+str(tmp_data_dir)+' classes.txt')
os.system('labelImg '+str(tmp_data_dir)+' labelmap.txt')
for thisdirpath, subdirs, files in os.walk(tmp_data_dir):
for file in files:
if file.endswith(".xml"):
Expand All @@ -105,8 +136,7 @@ def label_img(data_dir):
#
# main parameters
#
training_data_dir = Path('images/training')
validation_data_dir = Path('images/validation')
all_data_dir = Path('images/all')
boundingbox_data_dir = Path('boundingbox')

#
Expand All @@ -119,23 +149,63 @@ def label_img(data_dir):
#
# extract images we are interested in
#
classes = copy_images('train', training_data_dir);
copy_images('val', validation_data_dir);
classes = copy_images('train', all_data_dir);
copy_images('val', all_data_dir);

#
# copy any bounding boxes in
#
print ('Copying bounding box xml data')
for filename in glob.glob(os.path.join(boundingbox_data_dir, 'training', '*.*')):
shutil.copy(filename, training_data_dir)
for filename in glob.glob(os.path.join(boundingbox_data_dir, 'validation', '*.*')):
shutil.copy(filename, validation_data_dir)
for filename in glob.glob(os.path.join(boundingbox_data_dir, '*.xml')):
shutil.copy(filename, all_data_dir)

#
# use _ in name to fool later stats
#
for thisdirpath, subdirs, files in os.walk('images'):
for file in files:
if file.endswith(".xml"):
source = os.path.join(thisdirpath, file)
dest = os.path.join(thisdirpath, file+"new")
with open(source,'r') as file_in:
with open(dest, "w") as file_out:
for line in file_in:
newline = line
if '<name>' in newline:
newline = re.sub(' ', '_', newline)
print (newline, end = "", file=file_out)
shutil.move(dest, source)

#
# download voc ( for people )
#
# download_extract_tar('http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', 'VOCdevkit')

#
# copy any bounding boxes in
#
# print ('Finding only person')
# pattern = re.compile(r'<name>([^<]*)</name>')
# i=0
# for filename in glob.glob('VOCdevkit/VOC2012/Annotations/*.xml'):
#  with open(filename) as xmlfile:
# names = dict()
#  contents = xmlfile.read()
# for (name) in re.findall(pattern, contents):
# names[name] = 1
# if len(names) == 1 and names.get('person') == 1 and i < 250:
# image='VOCdevkit/VOC2012/JPEGImages/'+(Path(filename).stem)+'.jpg'
# text_file = open(os.path.join(all_data_dir,Path(filename).name), "w")
# n = text_file.write(contents.replace('<name>person</name>', '<name>Person</name>'))
# text_file.close()
# #shutil.copy(filename, all_data_dir)
# shutil.copy(image, all_data_dir)
# i=i+1

#
# run labelImg
#
label_img(training_data_dir)
label_img(validation_data_dir)
label_img(all_data_dir)

#
# backup xml (for any further data prep runs)
Expand All @@ -144,28 +214,84 @@ def label_img(data_dir):
for file in files:
if file.endswith(".xml"):
source = os.path.join(thisdirpath, file)
dest = os.path.join(boundingbox_data_dir, os.path.join(*(source.split(os.path.sep)[1:])))
dest = os.path.join(boundingbox_data_dir, file)
if not os.path.exists(dest):
print('Backing up '+source+' to '+dest)
os.makedirs(os.path.dirname(dest), exist_ok=True)
shutil.copy(source, dest)

#
# create label map
#
count=1
with open('label_map.pbtxt', "w") as file_out:
for name in classes.keys():
print ('item {', file=file_out)
print (' id: '+str(count), file=file_out)
print (' name: \''+name+'\'', file=file_out)
print ('}', file=file_out)
count=count+1
os.system('mkdir -p images/train images/validation images/test')

# Define paths to image folders
image_path = 'images/all'
train_path = 'images/train'
val_path = 'images/validation'
test_path = 'images/test'

# Get list of all images
jpg_file_list = glob.glob(image_path + '/*.jpg')
JPG_file_list = glob.glob(image_path + '/*.JPG')
png_file_list = glob.glob(image_path + '/*.png')
bmp_file_list = glob.glob(image_path + '/*.bmp')

file_list = jpg_file_list + JPG_file_list + png_file_list + bmp_file_list
file_num = len(file_list)
print('Total images: %d' % file_num)

# Determine number of files to move to each folder
train_percent = 0.8 # 80% of the files go to train
val_percent = 0.1 # 10% go to validation
test_percent = 0.1 # 10% go to test
train_num = int(file_num*train_percent)
val_num = int(file_num*val_percent)
test_num = file_num - train_num - val_num
print('Files moving to train: %d' % train_num)
print('Files moving to validation: %d' % val_num)
print('Files moving to test: %d' % test_num)

# Select 80% of files randomly and move them to train folder
for i in range(train_num):
move_me = random.choice(file_list)
fn = move_me.split('/')[-1]
base_fn = fn[:(len(fn)-4)] # Gets rid of .jpg, .png, or .bmp at end of the string
xml_fn = base_fn + '.xml'
os.rename(move_me, train_path+'/'+fn)
os.rename(os.path.join(image_path,xml_fn),os.path.join(train_path,xml_fn))
file_list.remove(move_me)

# Select 10% of remaining files and move them to validation folder
for i in range(val_num):
move_me = random.choice(file_list)
fn = move_me.split('/')[-1]
base_fn = fn[:(len(fn)-4)] # Gets rid of .jpg, .png, or .bmp at end of the string
xml_fn = base_fn + '.xml'
os.rename(move_me, val_path+'/'+fn)
os.rename(os.path.join(image_path,xml_fn),os.path.join(val_path,xml_fn))
file_list.remove(move_me)

# Move remaining files to test folder
for i in range(test_num):
file = file_list[i]
fn = file.split('/')[-1]
base_fn = fn[:(len(fn)-4)] # Gets rid of .jpg, .png, or .bmp at end of the string
xml_fn = base_fn + '.xml'
os.rename(file, test_path+'/'+fn)
os.rename(os.path.join(image_path,xml_fn),os.path.join(test_path,xml_fn))


#
# create TensorFlow records from xml
# create label map
#
os.system('python Tensorflow/scripts/generate_tfrecord.py -x images/training/ -l label_map.pbtxt -o training.tfrecord')
os.system('python Tensorflow/scripts/generate_tfrecord.py -x images/validation/ -l label_map.pbtxt -o validation.tfrecord')

#count=1
#with open('label_map.pbtxt', "w") as file_out:
# for name in classes.keys():
# print ('item {', file=file_out)
# print (' id: '+str(count), file=file_out)
# print (' name: \''+name+'\'', file=file_out)
# print ('}', file=file_out)
# count=count+1

# Create CSV data files and TFRecord files
os.system('python3 create_csv.py')
os.system('python3 create_tfrecord.py --csv_input=images/train_labels.csv --labelmap=labelmap.txt --image_dir=images/train --output_path=train.tfrecord')
os.system('python3 create_tfrecord.py --csv_input=images/validation_labels.csv --labelmap=labelmap.txt --image_dir=images/validation --output_path=val.tfrecord')
Loading

0 comments on commit bc5fc2f

Please sign in to comment.