Source code for emu.processes.wps_wordcounter

import json
import re

from collections import Counter

from pywps import Process
from pywps import ComplexInput, ComplexOutput, FORMATS
from pywps.app.Common import Metadata

import logging
LOGGER = logging.getLogger("PYWPS")


[docs]class WordCounter(Process): """ Notes ----- Counts occurrences of all words in a document. """ def __init__(self): inputs = [ ComplexInput('text', 'Text document', abstract='URL pointing to a text document, for example "Alice in Wonderland":' ' http://www.gutenberg.org/cache/epub/19033/pg19033.txt', supported_formats=[FORMATS.TEXT]), ] outputs = [ ComplexOutput('output', 'Word counter result', as_reference=True, supported_formats=[FORMATS.JSON]), ] super(WordCounter, self).__init__( self._handler, identifier='wordcounter', title='Word Counter', abstract="Counts words in a given text.", version='1.0', metadata=[ Metadata('User Guide', 'http://emu.readthedocs.io/en/latest/'), ], inputs=inputs, outputs=outputs, store_supported=True, status_supported=True) def _handler(self, request, response): response.update_status('PyWPS Process started.', 0) wordre = re.compile(r'\w+') def words(f): for line in f: for word in wordre.findall(line.decode('UTF-8')): yield word counts = Counter(words(request.inputs['text'][0].stream)) sorted_counts = sorted([(v, k) for (k, v) in counts.items()], reverse=True) response.outputs['output'].data = json.dumps(sorted_counts) response.update_status('PyWPS Process completed.', 100) return response