Source code for svg_model.__init__

# coding: utf-8
import cStringIO as StringIO
import re
import types
import warnings

import lxml
import pandas as pd
import pint  # Unit conversion from inches to mm
from .data_frame import get_bounding_boxes


XHTML_NAMESPACE = "http://www.w3.org/2000/svg"
NSMAP = {'svg' : XHTML_NAMESPACE}
INKSCAPE_NSMAP = NSMAP.copy()
INKSCAPE_NSMAP['inkscape'] = 'http://www.inkscape.org/namespaces/inkscape'

# Convert Inkscape pixels-per-inch (PPI) to pixels-per-mm (PPmm).
ureg = pint.UnitRegistry()

INKSCAPE_PPI = 90
INKSCAPE_PPmm = INKSCAPE_PPI / (1 * ureg.inch).to('mm')

float_pattern = r'[+-]?\d+(\.\d+)?([eE][+-]?\d+)?'  # 2, 1.23, 23e39, 1.23e-6, etc.
cre_path_command = re.compile(r'((?P<xy_command>[ML])\s+(?P<x>{0}),\s*(?P<y>{0})\s*|'
                              r'(?P<x_command>[H])\s+(?P<hx>{0})\s*|'
                              r'(?P<y_command>[V])\s+(?P<vy>{0})\s*|'
                              r'(?P<command>[Z]\s*))'
                              .format(float_pattern))


[docs]def shape_path_points(svg_path_d): ''' Parameters ---------- svg_path_d : str ``"d"`` attribute of SVG ``path`` element. Returns ------- list List of coordinates of points found in SVG path. Each point is represented by a dictionary with keys ``x`` and ``y``. ''' # TODO Add support for relative commands, e.g., `l, h, v`. def _update_path_state(path_state, match): if match.group('xy_command'): for dim_j in 'xy': path_state[dim_j] = float(match.group(dim_j)) if path_state.get('x0') is None: for dim_j in 'xy': path_state['%s0' % dim_j] = path_state[dim_j] elif match.group('x_command'): path_state['x'] = float(match.group('hx')) elif match.group('y_command'): path_state['y'] = float(match.group('vy')) elif match.group('command') == 'Z': for dim_j in 'xy': path_state[dim_j] = path_state['%s0' % dim_j] return path_state # Some commands in a SVG path element `"d"` attribute require previous state. # # For example, the `"H"` command is a horizontal move, so the previous # ``y`` position is required to resolve the new `(x, y)` position. # # Iterate through the commands in the `"d"` attribute in order and maintain # the current path position in the `path_state` dictionary. path_state = {'x': None, 'y': None} return [{k: v for k, v in _update_path_state(path_state, match_i) .iteritems() if k in 'xy'} for match_i in cre_path_command .finditer(svg_path_d)]
[docs]def svg_shapes_to_df(svg_source, xpath='//svg:path | //svg:polygon', namespaces=INKSCAPE_NSMAP): ''' Construct a data frame with one row per vertex for all shapes in :data:`svg_source``. Arguments --------- svg_source : str or file-like A file path, URI, or file-like object. xpath : str, optional XPath path expression to select shape nodes. By default, all ``svg:path`` and ``svg:polygon`` elements are selected. namespaces : dict, optional Key/value mapping of XML namespaces. Returns ------- pandas.DataFrame Frame with one row per vertex for all shapes in :data:`svg_source`, with the following columns: - ``vertex_i``: The index of the vertex within the corresponding shape. - ``x``: The x-coordinate of the vertex. - ``y``: The y-coordinate of the vertex. - other: attributes of the SVG shape element (e.g., ``id``, ``fill``, etc.) ''' from lxml import etree e_root = etree.parse(svg_source) frames = [] attribs_set = set() # Get list of attributes that are set in any of the shapes (not including # the `svg:path` `"d"` attribute or the `svg:polygon` `"points"` # attribute). # # This, for example, collects attributes such as: # # - `fill`, `stroke` (as part of `"style"` attribute) # - `"transform"`: matrix, scale, etc. for shape_i in e_root.xpath(xpath, namespaces=namespaces): attribs_set.update(shape_i.attrib.keys()) for k in ('d', 'points'): if k in attribs_set: attribs_set.remove(k) attribs = list(sorted(attribs_set)) # Always add 'id' attribute as first attribute. if 'id' in attribs: attribs.remove('id') attribs.insert(0, 'id') for shape_i in e_root.xpath(xpath, namespaces=namespaces): # Gather shape attributes from SVG element. base_fields = [shape_i.attrib.get(k, None) for k in attribs] if shape_i.tag == '{http://www.w3.org/2000/svg}path': # Decode `svg:path` vertices from [`"d"`][1] attribute. # # [1]: https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/d points_i = [base_fields + [i] + [point_i.get(k) for k in 'xy'] for i, point_i in enumerate(shape_path_points(shape_i.attrib['d']))] elif shape_i.tag == '{http://www.w3.org/2000/svg}polygon': # Decode `svg:polygon` vertices from [`"points"`][2] attribute. # # [2]: https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/points points_i = [base_fields + [i] + map(float, v.split(',')) for i, v in enumerate(shape_i.attrib['points'] .strip().split(' '))] else: warnings.warning('Unsupported shape tag type: %s' % shape_i.tag) continue frames.extend(points_i) if not frames: # There were no shapes found, so set `frames` list to `None` to allow # an empty data frame to be created. frames = None return pd.DataFrame(frames, columns=attribs + ['vertex_i', 'x', 'y'])
[docs]def svg_polygons_to_df(svg_source, xpath='//svg:polygon', namespaces=INKSCAPE_NSMAP): ''' Construct a data frame with one row per vertex for all shapes (e.g., ``svg:path``, ``svg:polygon``) in :data:`svg_source``. Arguments --------- svg_source : str or file-like A file path, URI, or file-like object. xpath : str, optional XPath path expression to select shape nodes. namespaces : dict, optional Key/value mapping of XML namespaces. Returns ------- pandas.DataFrame Frame with one row per vertex for all shapes in :data:`svg_source`, with the following columns: - ``path_id``: The ``id`` attribute of the corresponding shape. - ``vertex_i``: The index of the vertex within the corresponding shape. - ``x``: The x-coordinate of the vertex. - ``y``: The y-coordinate of the vertex. .. note:: Deprecated in :mod:`svg_model` 0.5.post10 :func:`svg_polygons_to_df` will be removed in :mod:`svg_model` 1.0, it is replaced by :func:`svg_shapes_to_df` because the latter is more general and works with ``svg:path`` and ``svg:polygon`` elements. ''' warnings.warn("The `svg_polygons_to_df` function is deprecated. Use " "`svg_shapes_to_df` instead.") result = svg_shapes_to_df(svg_source, xpath=xpath, namespaces=namespaces) return result[['id', 'vertex_i', 'x', 'y']].rename(columns={'id': 'path_id'})
[docs]def compute_shape_centers(df_shapes, shape_i_column, inplace=False): ''' Compute the center point of each polygon shape, and the offset of each vertex to the corresponding polygon center point. Parameters ---------- df_shapes : pandas.DataFrame Table of polygon shape vertices (one row per vertex). Must have at least the following columns: - ``vertex_i``: The index of the vertex within the corresponding shape. - ``x``: The x-coordinate of the vertex. - ``y``: The y-coordinate of the vertex. shape_i_column : str or list, optional Table rows with the same value in the :data:`shape_i_column` column are grouped together as a shape. in_place : bool, optional If ``True``, center coordinate columns are added directly to the input frame. Otherwise, center coordinate columns are added to copy of the input frame. Returns ------- pandas.DataFrame Input frame with the following additional columns: - ``x_center``/``y_center``: Absolute coordinates of shape center. - ``x_center_offset``/``y_center_offset``: * Coordinates of each vertex coordinate relative to shape center. ''' if not isinstance(shape_i_column, types.StringType): raise KeyError('Shape index must be a single column.') if not inplace: df_shapes = df_shapes.copy() # Get coordinates of center of each path. df_bounding_boxes = get_bounding_boxes(df_shapes, shape_i_column) path_centers = (df_bounding_boxes[['x', 'y']] + .5 * df_bounding_boxes[['width', 'height']].values) df_shapes['x_center'] = path_centers.x[df_shapes[shape_i_column]].values df_shapes['y_center'] = path_centers.y[df_shapes[shape_i_column]].values # Calculate coordinate of each path vertex relative to center point of # path. center_offset = (df_shapes[['x', 'y']] - df_shapes[['x_center', 'y_center']].values) return df_shapes.join(center_offset, rsuffix='_center_offset')
[docs]def scale_points(df_points, scale=INKSCAPE_PPmm.magnitude, inplace=False): ''' Translate points such that bounding box is anchored at (0, 0) and scale ``x`` and ``y`` columns of input frame by specified :data:`scale`. Parameters ---------- df_points : pandas.DataFrame Table of ``x``/``y`` point positions. Must have at least the following columns: - ``x``: x-coordinate - ``y``: y-coordinate scale : float, optional Factor to scale points by. By default, scale to millimeters based on Inkscape default of 90 pixels-per-inch. scale : float, optional Factor to scale points by. in_place : bool, optional If ``True``, input frame will be modified. Otherwise, the scaled points are written to a new frame, leaving the input frame unmodified. Returns ------- pandas.DataFrame Input frame with the points translated such that bounding box is anchored at (0, 0) and ``x`` and ``y`` values scaled by specified :data:`scale`. ''' if not inplace: df_points = df_points.copy() # Offset device, such that all coordinates are >= 0. df_points.x -= df_points.x.min() df_points.y -= df_points.y.min() # Scale path coordinates. df_points.x /= scale df_points.y /= scale return df_points
[docs]def scale_to_fit_a_in_b(a_shape, b_shape): ''' Return scale factor (scalar float) to fit `a_shape` into `b_shape` while maintaining aspect ratio. Arguments --------- a_shape, b_shape : pandas.Series Input shapes containing numeric `width` and `height` values. Returns ------- float Scale factor to fit :data:`a_shape` into :data:`b_shape` while maintaining aspect ratio. ''' # Normalize the shapes to allow comparison. a_shape_normal = a_shape / a_shape.max() b_shape_normal = b_shape / b_shape.max() if a_shape_normal.width > b_shape_normal.width: a_shape_normal *= b_shape_normal.width / a_shape_normal.width if a_shape_normal.height > b_shape_normal.height: a_shape_normal *= b_shape_normal.height / a_shape_normal.height return a_shape_normal.max() * b_shape.max() / a_shape.max()
[docs]def fit_points_in_bounding_box(df_points, bounding_box, padding_fraction=0): ''' Return data frame with ``x``, ``y`` columns scaled to fit points from :data:`df_points` to fill :data:`bounding_box` while maintaining aspect ratio. Arguments --------- df_points : pandas.DataFrame A frame with at least the columns ``x`` and ``y``, containing one row per point. bounding_box: pandas.Series A `pandas.Series` containing numeric `width` and `height` values. padding_fraction : float Fraction of padding to add around points. Returns ------- pandas.DataFrame Input frame with the points with ``x`` and ``y`` values scaled to fill :data:`bounding_box` while maintaining aspect ratio. ''' df_scaled_points = df_points.copy() offset, padded_scale = fit_points_in_bounding_box_params(df_points, bounding_box, padding_fraction) df_scaled_points[['x', 'y']] *= padded_scale df_scaled_points[['x', 'y']] += offset return df_scaled_points
[docs]def fit_points_in_bounding_box_params(df_points, bounding_box, padding_fraction=0): ''' Return offset and scale factor to scale ``x``, ``y`` columns of :data:`df_points` to fill :data:`bounding_box` while maintaining aspect ratio. Arguments --------- df_points : pandas.DataFrame A frame with at least the columns ``x`` and ``y``, containing one row per point. bounding_box: pandas.Series A `pandas.Series` containing numeric `width` and `height` values. padding_fraction : float Fraction of padding to add around points. Returns ------- (offset, scale) : (pandas.Series, float) Offset translation and scale required to fit all points in :data:`df_points` to fill :data:`bounding_box` while maintaining aspect ratio. :data:`offset` contains ``x`` and ``y`` values for the offset. ''' width = df_points.x.max() height = df_points.y.max() points_bbox = pd.Series([width, height], index=['width', 'height']) fill_scale = 1 - 2 * padding_fraction assert(fill_scale > 0) scale = scale_to_fit_a_in_b(points_bbox, bounding_box) padded_scale = scale * fill_scale offset = .5 * (bounding_box - points_bbox * padded_scale) offset.index = ['x', 'y'] return offset, padded_scale
[docs]def remove_layer(svg_source, layer_name): ''' Remove layer(s) from SVG document. Arguments --------- svg_source : str or file-like A file path, URI, or file-like object. layer_name : str or list Layer name or list of layer names to remove from SVG document. Returns ------- StringIO.StringIO File-like object containing XML document with layer(s) removed. ''' # Parse input file. xml_root = lxml.etree.parse(svg_source) svg_root = xml_root.xpath('/svg:svg', namespaces=INKSCAPE_NSMAP)[0] if isinstance(layer_name, types.StringTypes): layer_name = [layer_name] for layer_name_i in layer_name: # Remove existing layer from source, in-memory XML (source file remains # unmodified). layer_xpath = '//svg:g[@inkscape:label="%s"]' % layer_name_i layer_groups = svg_root.xpath(layer_xpath, namespaces=INKSCAPE_NSMAP) if layer_groups: for g in layer_groups: g.getparent().remove(g) # Write result to `StringIO`. output = StringIO.StringIO() xml_root.write(output) output.seek(0) return output
# ## Deprecated ##
[docs]def get_scaled_svg_frame(svg_filepath, **kwargs): ''' .. note:: Deprecated in :mod:`svg_model` 0.5 :func:`get_scaled_svg_frame` was removed in :mod:`svg_model` 0.5, it is replaced by :func:`svg_model.scale_points` and :func:`svg_model.compute_shape_centers`. ''' raise NotImplementedError('get_scaled_svg_frame function is deprecated. ' 'Use `svg_model.scale_points` and ' '`svg_model.compute_shape_centers`')