- initally push for face detection model in
authorEric Zavesky <ezavesky@research.att.com>
Sun, 15 Oct 2017 10:17:02 +0000 (05:17 -0500)
committerEric Zavesky <ezavesky@research.att.com>
Sun, 15 Oct 2017 10:17:02 +0000 (05:17 -0500)
privacy filter

.gitignore [new file with mode: 0644]
README.md [new file with mode: 0644]
bin/run_face-privacy-filter_reference.py [new file with mode: 0755]
bin/run_local.sh [new file with mode: 0755]
face_privacy_filter/__init__.py [new file with mode: 0644]
face_privacy_filter/data/haarcascade_frontalface_alt.xml.gz [new file with mode: 0644]
face_privacy_filter/filter_image.py [new file with mode: 0644]
face_privacy_filter/transform_detect.py [new file with mode: 0644]
setup.py

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..1857fad
--- /dev/null
@@ -0,0 +1,107 @@
+# Various
+sandbox*.py
+
+# Data for TensorFlow
+data/
+inception/
+vgg16/
+checkpoints/
+checkpoints*
+logs/
+summary/
+
+# PyCharm
+.idea/
+
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+venv/
+ENV/
+.DS_*
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+
diff --git a/README.md b/README.md
new file mode 100644 (file)
index 0000000..6e32cb8
--- /dev/null
+++ b/README.md
@@ -0,0 +1,63 @@
+# face-privacy-filter
+A model for face detection and suppression.
+
+## Image Analysis for Face-based Privacy Filtering
+This source code creates and pushes a model into Cognita that processes
+incoming images and outputs a detected faces as well as the original image
+input (if configured that way).  The model uses a [python interface](https://pypi.python.org/pypi/opencv-python)
+to the [OpenCV library](https://opencv.org/) to detect faces and perform
+subsequent image processing.  This module does not support training
+at this time and instead uses a pre-trained face cascade, which is
+included (from OpenCV) in this module.
+
+### Usage
+This package contains runable scripts for command-line evaluation,
+packaging of a model (both dump and posting), and simple web-test
+uses.   All functionality is encapsulsted in the `filter_image.py`
+script and has the following arguments.
+
+```
+usage: run_face-privacy-filter_reference.py [-h] [-p PREDICT_PATH] [-i INPUT]
+                                            [-s] [-a PUSH_ADDRESS]
+                                            [-d DUMP_MODEL]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -p PREDICT_PATH, --predict_path PREDICT_PATH
+                        save detections from model (model must be provided via
+                        'dump_model')
+  -i INPUT, --input INPUT
+                        absolute path to input image (only during prediction /
+                        dump)
+  -s, --suppress_image  do not create an extra row for a returned image
+  -a PUSH_ADDRESS, --push_address PUSH_ADDRESS
+                        server address to push the model (e.g.
+                        http://localhost:8887/v2/models)
+  -d DUMP_MODEL, --dump_model DUMP_MODEL
+                        dump model to a pickle directory for local running
+```
+
+
+### Examples
+Example for dumping the `detect` model to disk.
+```
+./bin/run_local.sh -d model
+```
+
+
+## Face-based Use Cases
+This project includes a number of face-based use cases including raw
+detection, blurring, and other image-based modifications based on
+detected image regions.
+
+* **Face Detection Use-case** - This source code creates and pushes a model that processes
+incoming images and outputs detected faces.
+
+# Example Interface
+An instance should first be built and downloaded and then
+launched locally.  Afterwards, the sample application found in 
+[web_demo](web_demo) uses a `localhost` service to classify
+and visualize the results of image classification.
+
+* [DiCaprio celebrity face sample](web_demo/images/face_DiCaprio.jpg) [wikimedia source](https://en.wikipedia.org/wiki/Celebrity#/media/File:Leonardo_DiCaprio_visited_Goddard_Saturday_to_discuss_Earth_science_with_Piers_Sellers_(26105091624)_cropped.jpg)
+* [Schwarzenegger celebrity face sample](web_demo/images/face_Schwarzenegger.jpg) [wikimedia source](https://upload.wikimedia.org/wikipedia/commons/thumb/0/0f/A._Schwarzenegger.jpg/220px-A._Schwarzenegger.jpg)
\ No newline at end of file
diff --git a/bin/run_face-privacy-filter_reference.py b/bin/run_face-privacy-filter_reference.py
new file mode 100755 (executable)
index 0000000..4e48510
--- /dev/null
@@ -0,0 +1,10 @@
+#! python
+# -*- coding: utf-8 -*-
+"""
+Command line code for face privacy filter
+"""
+
+from face_privacy_filter.filter_image import main
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/run_local.sh b/bin/run_local.sh
new file mode 100755 (executable)
index 0000000..01387d7
--- /dev/null
@@ -0,0 +1,11 @@
+#!/bin/bash
+#------------------------------------------------------------------------
+#  run_local.sh - locally starts a face privacy filter instance
+#------------------------------------------------------------------------
+
+# infer the project location
+MODEL_DIR=$(dirname $( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) )
+echo "Local run directory '$MODEL_DIR'..."
+
+# inject into python path and run with existing args (for unix-like environments)
+PYTHONPATH="$MODEL_DIR:$PYTHONPATH" python $MODEL_DIR/bin/run_face-privacy-filter_reference.py $*
diff --git a/face_privacy_filter/__init__.py b/face_privacy_filter/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/face_privacy_filter/data/haarcascade_frontalface_alt.xml.gz b/face_privacy_filter/data/haarcascade_frontalface_alt.xml.gz
new file mode 100644 (file)
index 0000000..b2f8e98
Binary files /dev/null and b/face_privacy_filter/data/haarcascade_frontalface_alt.xml.gz differ
diff --git a/face_privacy_filter/filter_image.py b/face_privacy_filter/filter_image.py
new file mode 100644 (file)
index 0000000..e87cb24
--- /dev/null
@@ -0,0 +1,78 @@
+#! python
+# -*- coding: utf-8 -*-
+"""
+Wrapper for image emotion classification task 
+"""
+
+import os.path
+import sys
+
+import numpy as np
+import pandas as pd
+
+from face_privacy_filter.transform_detect import FaceDetectTransform
+from face_privacy_filter._version import MODEL_NAME
+
+
+def model_create_pipeline(transformer, pipeline_type="detect"):
+    #from sklearn.pipeline import Pipeline
+    dependent_modules = [pd, np, 'opencv-python']  # define as dependent libraries
+
+    # for now, do nothing specific to transformer...
+
+    return transformer, dependent_modules
+
+
+def main(config={}):
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p', '--predict_path', type=str, default='', help="save detections from model (model must be provided via 'dump_model')")
+    parser.add_argument('-i', '--input', type=str, default='',help='absolute path to input image (only during prediction / dump)')
+    parser.add_argument('-s', '--suppress_image', dest='suppress_image', action='store_true', default=False, help='do not create an extra row for a returned image')
+    parser.add_argument('-a', '--push_address', help='server address to push the model (e.g. http://localhost:8887/v2/models)', default='')
+    parser.add_argument('-d', '--dump_model', help='dump model to a pickle directory for local running', default='')
+    config.update(vars(parser.parse_args()))     #pargs, unparsed = parser.parse_known_args()
+
+    if not config['predict_path']:
+        print("Attempting to create new model for dump or push...")
+
+        # refactor the raw samples from upstream image classifier
+        transform = FaceDetectTransform(include_image=not config['suppress_image'])
+        inputDf = transform.generate_in_df()
+        pipeline, EXTRA_DEPS = model_create_pipeline(transform, "detect")
+
+        # formulate the pipeline to be used
+        if 'push_address' in config and config['push_address']:
+            from cognita_client.push import push_sklearn_model # push_skkeras_hybrid_model (keras?)
+            print("Pushing new model to '{:}'...".format(config['push_address']))
+            push_sklearn_model(pipeline, inputDf, api=config['push_address'], name=MODEL_NAME, extra_deps=EXTRA_DEPS)
+
+        if 'dump_model' in config and config['dump_model']:
+            from cognita_client.wrap.dump import dump_sklearn_model # dump_skkeras_hybrid_model (keras?)
+            print("Dumping new model to '{:}'...".format(config['dump_model']))
+            dump_sklearn_model(pipeline, inputDf, config['dump_model'], name=MODEL_NAME, extra_deps=EXTRA_DEPS)
+
+    else:
+        if not config['dump_model'] or not os.path.exists(config['dump_model']):
+            print("Attempting to predict from a dumped model, but model not found.".format(config['dump_model']))
+            sys.exit(-1)
+        if not os.path.exists(config['input']):
+            print("Predictino requested but target input '{:}' was not found, please check input arguments.".format(config['input']))
+            sys.exit(-1)
+
+        print("Attempting predict/transform on input sample...")
+        from cognita_client.wrap.load import load_model
+        model = load_model(config['dump_model'])
+        inputDf = FaceDetectTransform.generate_in_df(config['input'])
+        dfPred = model.transform.from_native(inputDf).as_native()
+        dfPred = FaceDetectTransform.suppress_image(dfPred)
+
+        if config['predict_path']:
+            print("Writing prediction to file '{:}'...".format(config['predict_path']))
+            dfPred.to_csv(config['predict_path'], sep=",", index=False)
+
+        if dfPred is not None:
+            print("Predictions:\n{:}".format(dfPred))
+
+if __name__ == '__main__':
+    main()
diff --git a/face_privacy_filter/transform_detect.py b/face_privacy_filter/transform_detect.py
new file mode 100644 (file)
index 0000000..393d02e
--- /dev/null
@@ -0,0 +1,140 @@
+#! python
+# -*- coding: utf-8 -*-
+"""
+Wrapper for face detection task; wrapped in classifier for pipieline terminus
+"""
+import cv2
+import os
+import pandas as pd
+import numpy as np
+from sklearn.base import BaseEstimator, ClassifierMixin
+
+class FaceDetectTransform(BaseEstimator, ClassifierMixin):
+    '''
+    A sklearn transformer mixin that detects faces and optionally outputa the original detected image
+    '''
+    CASCADE_DEFAULT_FILE = "data/haarcascade_frontalface_alt.xml.gz"
+    COL_FACE_X = 'x'
+    COL_FACE_Y = 'y'
+    COL_FACE_W = 'w'
+    COL_FACE_H = 'h'
+    COL_FACE_IDX = 'region'
+    COL_IMAGE_IDX = 'image'
+    COL_IMAGE_MIME = 'mime_type'
+    COL_IMAGE_DATA = 'binary_stream'
+
+    def __init__(self, cascade_path=None, include_image=True):
+        self.include_image = include_image    # should output transform include image?
+        self.cascade_path = cascade_path    # abs path outside of module
+        self.cascade_obj = None # late-load this component
+
+    def get_params(self, deep=False):
+        return {'include_image': self.include_image}
+
+    @staticmethod
+    def generate_in_df(path_image=""):
+        # munge stream and mimetype into input sample
+        binStream = b""
+        if path_image and os.path.exists(path_image):
+            binStream = open(path_image, 'rb').read()
+        return pd.DataFrame([['image/jpeg', binStream]], columns=[FaceDetectTransform.COL_IMAGE_MIME, FaceDetectTransform.COL_IMAGE_DATA])
+
+    def generate_out_dict(self, idx=-1, x=0, y=0, w=0, h=0, image=0):
+        return {FaceDetectTransform.COL_FACE_IDX: idx, FaceDetectTransform.COL_FACE_X: x,
+                FaceDetectTransform.COL_FACE_Y: y, FaceDetectTransform.COL_FACE_W: w, FaceDetectTransform.COL_FACE_H: h,
+                FaceDetectTransform.COL_IMAGE_IDX: image,
+                FaceDetectTransform.COL_IMAGE_MIME: '', FaceDetectTransform.COL_IMAGE_DATA: ''}
+
+    @staticmethod
+    def suppress_image(df):
+        keep_col = [FaceDetectTransform.COL_FACE_X, FaceDetectTransform.COL_FACE_Y,
+                    FaceDetectTransform.COL_FACE_W, FaceDetectTransform.COL_FACE_H,
+                    FaceDetectTransform.COL_FACE_IDX, FaceDetectTransform.COL_IMAGE_IDX]
+        blank_cols = [col for col in df.columns if col not in keep_col]
+        # set columns that aren't in our known column list to empty strings; search where face index==-1 (no face)
+        df.loc[df[FaceDetectTransform.COL_FACE_IDX]==-1,blank_cols] = ""
+        return df
+
+    @property
+    def output_names_(self):
+        return [FaceDetectTransform.COL_FACE_IDX, FaceDetectTransform.COL_FACE_X, FaceDetectTransform.COL_FACE_Y,
+                 FaceDetectTransform.COL_FACE_W, FaceDetectTransform.COL_FACE_H,
+                 FaceDetectTransform.COL_IMAGE_IDX, FaceDetectTransform.COL_IMAGE_MIME, FaceDetectTransform.COL_IMAGE_DATA]
+
+    @property
+    def output_types_(self):
+        list_name = self.output_names_
+        list_type = self.classes_
+        return [{list_name[i]:list_type[i]} for i in range(len(list_name))]
+
+    @property
+    def n_outputs_(self):
+        return 8
+
+    @property
+    def classes_(self):
+        return [int, int, int, int, int, int, str, str]
+
+    def score(self, X, y=None):
+        return 0
+
+    def fit(self, X, y=None):
+        return self
+
+    def predict(self, X, y=None):
+        """
+        Assumes a numpy array of [[mime_type, binary_string] ... ]
+           where mime_type is an image-specifying mime type and binary_string is the raw image bytes       
+        """
+        # if no model exists yet, create it
+        if self.cascade_obj is None:
+            if self.cascade_path is not None:
+                self.cascade_obj = cv2.CascadeClassifier(self.cascade_path)
+            else:   # none provided, load what came with the package
+                pathRoot = os.path.dirname(os.path.abspath(__file__))
+                pathFile = os.path.join(pathRoot, FaceDetectTransform.CASCADE_DEFAULT_FILE)
+                self.cascade_obj = cv2.CascadeClassifier(pathFile)
+
+        dfReturn = None
+        for image_idx in range(len(X)):
+            # image_set = X[:, image_idx]
+            file_bytes = np.asarray(bytearray(X[FaceDetectTransform.COL_IMAGE_DATA][image_idx]), dtype=np.uint8)
+            img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
+            # img = cv2.imread(image_set[1])
+            faces = self.detect_faces(img)
+
+            df = pd.DataFrame()  # start with empty DF for this image
+            if self.include_image:  # create and append the image if that's requested
+                dict_image = self.generate_out_dict(w=img.shape[0], h=img.shape[1], image=image_idx)
+                dict_image[self.mime_col] = image_set[0]
+                dict_image[self.data_col] = image_set[1]
+                df = pd.DataFrame([dict_image])
+            for idxF in range(len(faces)):  # walk through detected faces
+                face_rect = faces[idxF]
+                df = df.append(pd.DataFrame([self.generate_out_dict(idxF, face_rect[0], face_rect[1],
+                                                                    face_rect[2], face_rect[3], image=image_idx)]),
+                               ignore_index=True)
+            if dfReturn is None:  # create an NP container for all image samples + features
+                dfReturn = df.reindex_axis(self.output_names_, axis=1)
+            else:
+                dfReturn = dfReturn.append(df, ignore_index=True)
+            print("IMAGE {:} found {:} total rows".format(image_idx, len(df)))
+
+        return dfReturn
+
+    def detect_faces(self, img):
+        if self.cascade_obj is None: return []
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+        faces = self.cascade_obj.detectMultiScale(
+            gray,
+            scaleFactor=1.1,
+            minNeighbors=5,
+            minSize=(30, 30),
+            flags=cv2.CASCADE_SCALE_IMAGE
+        )
+
+        # Draw a rectangle around the faces
+        #for (x, y, w, h) in faces:
+        #    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
+        return faces
index c20b87e..340a0ab 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@ setup(
     install_requires=['cognita_client',
                       'numpy',
                       'sklearn',
-                      'cv2'
+                      'opencv-python'
                       globals_dict['MODEL_NAME']],
     tests_require=['pytest',
                    'pexpect'],