Source code for emu.processes.wps_wordcounter
import json
import logging
import re
from collections import Counter
from pywps import FORMATS, ComplexInput, ComplexOutput, Process
from pywps.ext_autodoc import MetadataUrl
LOGGER = logging.getLogger("PYWPS")
[docs]
class WordCounter(Process):
"""
Notes:
Counts occurrences of all words in a document.
"""
def __init__(self):
inputs = [
ComplexInput('text', 'Text document',
abstract='URL pointing to a text document, for example "Alice in Wonderland":'
' http://www.gutenberg.org/cache/epub/19033/pg19033.txt',
supported_formats=[FORMATS.TEXT]), ]
outputs = [
ComplexOutput('output', 'Word counter result',
as_reference=True,
supported_formats=[FORMATS.JSON]), ]
super(WordCounter, self).__init__(
self._handler,
identifier='wordcounter',
title='Word Counter',
abstract="Counts words in a given text.",
version='1.0',
metadata=[
MetadataUrl('User Guide',
'http://emu.readthedocs.io/en/latest/',
anonymous=True),
],
inputs=inputs,
outputs=outputs,
store_supported=True,
status_supported=True)
def _handler(self, request, response):
response.update_status('PyWPS Process started.', 0)
wordre = re.compile(r'\w+')
def words(f):
for line in f:
for word in wordre.findall(line.decode('UTF-8')):
yield word
counts = Counter(words(request.inputs['text'][0].stream))
sorted_counts = sorted([(v, k) for (k, v) in counts.items()],
reverse=True)
response.outputs['output'].data = json.dumps(sorted_counts)
response.update_status('PyWPS Process completed.', 100)
return response