- serialize face cascade to avoid filesystem
authorEric Zavesky <ezavesky@research.att.com>
Fri, 17 Nov 2017 17:23:27 +0000 (11:23 -0600)
committerEric Zavesky <ezavesky@research.att.com>
Fri, 17 Nov 2017 17:23:27 +0000 (11:23 -0600)
dependency; update javascript param name

face_privacy_filter/filter_image.py
face_privacy_filter/transform_detect.py
face_privacy_filter/transform_region.py
testing/app.py
web_demo/face-privacy.js

index f4aaa43..ce2a55b 100644 (file)
@@ -32,9 +32,10 @@ def model_create_pipeline(transformer):
     output_type = [(k, List[type_list[k]]) for k in type_list]
     type_out = create_namedtuple(type_name, output_type)
 
-    def predict_class(value: type_in) -> type_out:
+    def predict_class(val_wrapped: type_in) -> type_out:
         '''Returns an array of float predictions'''
-        df = pd.DataFrame(np.column_stack(value), columns=value._fields)
+        df = pd.DataFrame(list(zip(*val_wrapped)), columns=val_wrapped._fields)
+        # df = pd.DataFrame(np.column_stack(val_wrapped), columns=val_wrapped._fields)  # numpy doesn't like binary
         tags_df = transformer.predict(df)
         tags_list = type_out(*(col for col in tags_df.values.T))  # flatten to tag set
         return tags_list
@@ -106,7 +107,7 @@ def main(config={}):
         type_in = model.transform._input_type
         transform_in = type_in(*tuple(col for col in inputDf.values.T))
         transform_out = model.transform.from_wrapped(transform_in).as_wrapped()
-        dfPred = pd.DataFrame(np.column_stack(transform_out), columns=transform_out._fields)
+        dfPred = pd.DataFrame(list(zip(*transform_out)), columns=transform_out._fields)
 
         if not config['csv_input']:
             dfPred = FaceDetectTransform.suppress_image(dfPred)
index 1bc9528..5dc6799 100644 (file)
@@ -10,6 +10,13 @@ import numpy as np
 from sklearn.base import BaseEstimator, ClassifierMixin
 import base64
 
+import gzip
+import sys
+if sys.version_info[0] < 3:
+    from cStringIO import StringIO as BytesIO
+else:
+    from io import BytesIO as BytesIO
+
 
 class FaceDetectTransform(BaseEstimator, ClassifierMixin):
     '''
@@ -26,13 +33,37 @@ class FaceDetectTransform(BaseEstimator, ClassifierMixin):
     COL_IMAGE_DATA = 'image_binary'
     VAL_REGION_IMAGE_ID = -1
 
-    def __init__(self, cascade_path=None, include_image=True):
+    def __init__(self, cascade_path=None, cascade_stream=None, include_image=True):
         self.include_image = include_image    # should output transform include image?
-        self.cascade_path = cascade_path    # abs path outside of module
         self.cascade_obj = None  # late-load this component
+        self.cascade_stream = cascade_stream    # compressed binary final for cascade data
+        if self.cascade_stream is None:
+            if cascade_path is None:   # default/included data?
+                pathRoot = os.path.dirname(os.path.abspath(__file__))
+                cascade_path = os.path.join(pathRoot, FaceDetectTransform.CASCADE_DEFAULT_FILE)
+            raw_stream = b""
+            with open(cascade_path, 'rb') as f:
+                raw_stream = f.read()
+                self.cascade_stream = {'name': os.path.basename(cascade_path),
+                                       'data': FaceDetectTransform.string_compress(raw_stream)}
+
+    @staticmethod
+    def string_compress(string_data):
+        out_data = BytesIO()
+        with gzip.GzipFile(fileobj=out_data, mode="wb") as f:
+            f.write(string_data)
+        return out_data.getvalue()
+
+    @staticmethod
+    def string_decompress(compressed_data):
+        in_data = BytesIO(compressed_data)
+        ret_str = None
+        with gzip.GzipFile(fileobj=in_data, mode="rb") as f:
+            ret_str = f.read()
+        return ret_str
 
     def get_params(self, deep=False):
-        return {'include_image': self.include_image}
+        return {'include_image': self.include_image, 'cascade_stream': self.cascade_stream}
 
     @staticmethod
     def generate_in_df(path_image="", bin_stream=b""):
@@ -47,12 +78,16 @@ class FaceDetectTransform(BaseEstimator, ClassifierMixin):
         with open(path_image, 'wb') as f:
             f.write(row[FaceDetectTransform.COL_IMAGE_DATA][0])
 
+    @staticmethod
+    def output_names_():
+        return [FaceDetectTransform.COL_IMAGE_IDX, FaceDetectTransform.COL_REGION_IDX,
+                FaceDetectTransform.COL_FACE_X, FaceDetectTransform.COL_FACE_Y,
+                FaceDetectTransform.COL_FACE_W, FaceDetectTransform.COL_FACE_H,
+                FaceDetectTransform.COL_IMAGE_MIME, FaceDetectTransform.COL_IMAGE_DATA]
+
     @staticmethod
     def generate_out_dict(idx=VAL_REGION_IMAGE_ID, x=0, y=0, w=0, h=0, image=0, bin_stream=b"", media=""):
-        return {FaceDetectTransform.COL_IMAGE_IDX: image, FaceDetectTransform.COL_REGION_IDX: idx,
-                FaceDetectTransform.COL_FACE_X: x, FaceDetectTransform.COL_FACE_Y: y,
-                FaceDetectTransform.COL_FACE_W: w, FaceDetectTransform.COL_FACE_H: h,
-                FaceDetectTransform.COL_IMAGE_MIME: media, FaceDetectTransform.COL_IMAGE_DATA: bin_stream}
+        return dict(zip(FaceDetectTransform.output_names_(), [image, idx, x, y, w, h, media, bin_stream]))
 
     @staticmethod
     def suppress_image(df):
@@ -78,21 +113,28 @@ class FaceDetectTransform(BaseEstimator, ClassifierMixin):
     def fit(self, X, y=None):
         return self
 
+    def load_cascade(self):
+        # if no model exists yet, create it; return False for deserialize required
+        if self.cascade_obj is None:
+            if self.cascade_stream is not None:
+                import tempfile
+                with tempfile.TemporaryDirectory() as tdir:
+                    cascade_data = FaceDetectTransform.string_decompress(self.cascade_stream['data'])
+                    cascade_path = os.path.join(tdir, self.cascade_stream['name'])
+                    with open(cascade_path, 'wb') as f:
+                        f.write(cascade_data)
+                    self.cascade_obj = cv2.CascadeClassifier(cascade_path)
+            return False
+        return True
+
     def predict(self, X, y=None):
         """
         Assumes a numpy array of [[mime_type, binary_string] ... ]
            where mime_type is an image-specifying mime type and binary_string is the raw image bytes
         """
-        # if no model exists yet, create it
-        if self.cascade_obj is None:
-            if self.cascade_path is not None:
-                self.cascade_obj = cv2.CascadeClassifier(self.cascade_path)
-            else:   # none provided, load what came with the package
-                pathRoot = os.path.dirname(os.path.abspath(__file__))
-                pathFile = os.path.join(pathRoot, FaceDetectTransform.CASCADE_DEFAULT_FILE)
-                self.cascade_obj = cv2.CascadeClassifier(pathFile)
-
+        self.load_cascade()  # JIT load model
         dfReturn = None
+        listData = []
         for image_idx in range(len(X)):
             image_byte = X[FaceDetectTransform.COL_IMAGE_DATA][image_idx]
             if type(image_byte) == str:
@@ -104,24 +146,17 @@ class FaceDetectTransform(BaseEstimator, ClassifierMixin):
             # img = cv2.imread(image_set[1])
             faces = self.detect_faces(img)
 
-            df = pd.DataFrame()  # start with empty DF for this image
             if self.include_image:  # create and append the image if that's requested
-                dict_image = FaceDetectTransform.generate_out_dict(w=img.shape[1], h=img.shape[0], image=image_idx)
-                dict_image[FaceDetectTransform.COL_IMAGE_MIME] = X[FaceDetectTransform.COL_IMAGE_MIME][image_idx]
-                dict_image[FaceDetectTransform.COL_IMAGE_DATA] = X[FaceDetectTransform.COL_IMAGE_DATA][image_idx]
-                df = pd.DataFrame([dict_image])
+                listData.append(FaceDetectTransform.generate_out_dict(w=img.shape[1], h=img.shape[0], image=image_idx,
+                                                                      media=X[FaceDetectTransform.COL_IMAGE_MIME][image_idx],
+                                                                      bin_stream=X[FaceDetectTransform.COL_IMAGE_DATA][image_idx]))
             for idxF in range(len(faces)):  # walk through detected faces
                 face_rect = faces[idxF]
-                df = df.append(pd.DataFrame([FaceDetectTransform.generate_out_dict(idxF, face_rect[0], face_rect[1],
-                                                                                   face_rect[2], face_rect[3], image=image_idx)]),
-                               ignore_index=True)
-            if dfReturn is None:  # create an NP container for all image samples + features
-                dfReturn = df   # df.reindex_axis(self.output_names_, axis=1)
-            else:
-                dfReturn = dfReturn.append(df, ignore_index=True)
+                listData.append(FaceDetectTransform.generate_out_dict(idxF, x=face_rect[0], y=face_rect[1],
+                                                                      w=face_rect[2], h=face_rect[3], image=image_idx))
             # print("IMAGE {:} found {:} total rows".format(image_idx, len(df)))
 
-        return dfReturn
+        return pd.DataFrame(listData, columns=FaceDetectTransform.output_names_())  # start with empty DF for this image
 
     def detect_faces(self, img):
         if self.cascade_obj is None:
index 2cd3689..e50726f 100644 (file)
@@ -29,8 +29,12 @@ class RegionTransform(BaseEstimator, ClassifierMixin):
         return {'transform_mode': self.transform_mode}
 
     @staticmethod
-    def generate_out_df(media_type="", bin_stream=b""):
-        return pd.DataFrame([[media_type, bin_stream]], columns=[FaceDetectTransform.COL_IMAGE_MIME, FaceDetectTransform.COL_IMAGE_DATA])
+    def output_names_():
+        return [FaceDetectTransform.COL_IMAGE_MIME, FaceDetectTransform.COL_IMAGE_DATA]
+
+    @staticmethod
+    def generate_out_dict(bin_stream=b"", media=""):
+        return {FaceDetectTransform.COL_IMAGE_MIME: media, FaceDetectTransform.COL_IMAGE_DATA: bin_stream}
 
     @staticmethod
     def generate_in_df(idx=FaceDetectTransform.VAL_REGION_IMAGE_ID, x=0, y=0, w=0, h=0, image=0, bin_stream=b"", media=""):
@@ -68,10 +72,9 @@ class RegionTransform(BaseEstimator, ClassifierMixin):
         #   collect all remaining regions, operate with each on input image
         #   generate output image, send to output
 
-        dfReturn = None
         image_region_list = RegionTransform.transform_raw_sample(X)
+        listData = []
         for image_data in image_region_list:
-            # print(image_data)
             img = image_data['data']
             for r in image_data['regions']:  # loop through regions
                 x_max = min(r[0] + r[2], img.shape[1])
@@ -84,13 +87,9 @@ class RegionTransform(BaseEstimator, ClassifierMixin):
             img_binary = cv2.imencode(".jpg", img)[1].tostring()
             img_mime = 'image/jpeg'  # image_data['mime']
 
-            df = RegionTransform.generate_out_df(media_type=img_mime, bin_stream=img_binary)
-            if dfReturn is None:  # create an NP container for all images
-                dfReturn = df.reindex_axis(self.output_names_, axis=1)
-            else:
-                dfReturn = dfReturn.append(df, ignore_index=True)
-            print("IMAGE {:} found {:} total rows".format(image_data['image'], len(df)))
-        return dfReturn
+            listData.append(RegionTransform.generate_out_dict(media=img_mime, bin_stream=img_binary))
+            print("IMAGE {:} found {:} total rows".format(image_data['image'], len(image_data['regions'])))
+        return pd.DataFrame(listData, columns=RegionTransform.output_names_())
 
     @staticmethod
     def transform_raw_sample(raw_sample):
@@ -111,7 +110,9 @@ class RegionTransform(BaseEstimator, ClassifierMixin):
             image_byte = image_row[FaceDetectTransform.COL_IMAGE_DATA][0]
             if type(image_byte) == str:
                 image_byte = image_byte.encode()
-            image_byte = bytearray(base64.b64decode(image_byte))
+                image_byte = bytearray(base64.b64decode(image_byte))
+            else:
+                image_byte = bytearray(image_byte)
             file_bytes = np.asarray(image_byte, dtype=np.uint8)
             local_image['data'] = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
             local_image['image'] = nameG
index 7cffcaa..8ec71ef 100755 (executable)
@@ -38,12 +38,13 @@ def transform(mime_type, image_binary):
         detect_in = type_in(*tuple(col for col in X.values.T))
         pred_out = app.model_detect.transform.from_wrapped(detect_in)
     if app.model_proc is not None and pred_out is not None:  # then transform to output type
-        pred_out = app.model_proc.transform.from_msg(pred_out.as_msg())
+        pred_out = app.model_proc.transform.from_pb_msg(pred_out.as_pb_msg()).as_wrapped()
     time_stop = time.clock()-time_start
 
     pred = None
     if pred_out is not None:
-        pred = pd.DataFrame(np.column_stack(pred_out), columns=pred_out._fields)
+        pred = pd.DataFrame(list(zip(*pred_out)), columns=pred_out._fields)
+        pred['image_binary'] = pred['image_binary'].apply(lambda x: base64.b64encode(x).decode())
     retStr = json.dumps(pred.to_dict(orient='records'), indent=4)
 
     # formulate response
index a4f7d17..4e76cfe 100644 (file)
@@ -230,7 +230,7 @@ function doPostImage(srcCanvas, dstImg, dataPlaceholder) {
 \r
        $(document.body).data('hdparams').imageIsWaiting = true;\r
     serviceURL = hd.classificationServer;\r
-    fd.append("base64_data", blob);\r
+    fd.append("image_binary", blob);\r
     fd.append("mime_type", "image/jpeg");\r
     var $dstImg = $(dstImg);\r
     if ($dstImg.attr('src')=='') {\r
@@ -246,7 +246,7 @@ function doPostImage(srcCanvas, dstImg, dataPlaceholder) {
                    var responseJson = $.parseJSON(request.responseText);\r
                    var respImage = responseJson[0];\r
                    // https://stackoverflow.com/questions/21227078/convert-base64-to-image-in-javascript-jquery\r
-            $dstImg.attr('src', "data:"+respImage['mime_type']+";base64,"+respImage['base64_data']).removeClass('workingImage');\r
+            $dstImg.attr('src', "data:"+respImage['mime_type']+";base64,"+respImage['image_binary']).removeClass('workingImage');\r
                        //genClassTable($.parseJSON(request.responseText), dstDiv);\r
                        hd.imageIsWaiting = false;\r
                }\r