-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsequence.py
More file actions
27 lines (21 loc) · 838 Bytes
/
sequence.py
File metadata and controls
27 lines (21 loc) · 838 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from pyspark import RDD
from pyspark.sql import SparkSession
from pyspark.sql import DataFrame
import json
class SequenceOfItems:
def __init__(self, sequence, sparksession):
self._jsequence = sequence
self._sparkcontext = sparksession.sparkContext
self._sparksession = sparksession
def json(self):
return [json.loads(l.serializeAsJSON()) for l in self._jsequence.items()]
def rdd(self):
rdd = self._jsequence.getAsPickledStringRDD();
rdd = RDD(rdd, self._sparkcontext)
return rdd.map(lambda l: json.loads(l))
def df(self):
return DataFrame(self._jsequence.getAsDataFrame(), self._sparksession)
def nextJSON(self):
return self._jsequence.next().serializeAsJSON()
def __getattr__(self, item):
return getattr(self._jsequence, item)