Getting Started with Avro

前端 未结 4 407
予麋鹿
予麋鹿 2020-12-28 18:07

I want to get started with using Avro with Map Reduce. Can Someone suggest a good tutorial / example to get started with. I couldnt find much through the internet search.

相关标签:
4条回答
  • 2020-12-28 18:46

    I recently did a project that was heavily based on Avro data and not having used this data format before, I had to start from scratch. You are right in that it is rather hard to get much help from online sources when getting started with Avro. The material that I would recommend to you is:

    • By far, the most helpful source that I found was the Avro section (p103-p116) in Tom White's Hadoop: The Definitive Guide book as well as his Github page for the code he uses in the book.
    • For additional code examples I looked at Ron Bodkin's Github page avro-mr-sample.
    • In my case I used Python for reading and writing Avro files and for that I used this tutorial.
    • Even though it is obvious, I will add the link to the Avro Users mailing list. There is a ton of information to be found there and after I had read the above material and implemented a bunch of code, I found myself spending hours looking through the archives.

    Finally, my last suggestion to you is to use Avro 1.4.1 with Hadoop 0.20.2 and ONLY that combination. I had some major issues getting my code to run using Hadoop 0.21 and more recent Avro versions.

    0 讨论(0)
  • 2020-12-28 18:50

    javascript + jquery : https://codepen.io/GentjanLikaj/pen/dyPXLXX

    $( "#btn1" ).click(function() {
    	$( "table" ).remove();
    	var text1 = document.getElementById('text').value;
    	var text1=text1.replace(/(\r\n|\n|\r)/gm,"");
    	var sep = document.getElementById('sep').value;
    	var aray = text1.split(sep);
    	var formato = '<select class="form"><option value="string">string</option><option value="int">int</option><option value="date">date</option><option value="datetime">datetime</option><option value="float">float</option><option value="varchar(50)">varchar</option></select>';
    	var univoco = '<input id="checkbox" type="checkbox" name="univoco" value="true" > <br>';
    	var null1 = '<input id="null" class="null11" type="checkbox" name="univoco" value="null" checked> <br>';
    	var html = '<table id="tbl" class="table table-sm  table-striped table-centered .thead-dark "><thead class="thead-dark"><tr class="table-primary"><th scope="col">ColummnName</th><th scope="col">Format</th>  <th scope="col">Null</th>   </tr>  </thead> <tbody>';
    	$.each(aray, function(i, val) {
    		html += '<tr><td id = "val">' + val + '</td><td id = "form">' + formato + '</td><td id = "nul">' + null1 + '</td></tr>';
    	});
    	html += '</tbody></table>';	
    	$("#table").append(html);
    });
    
    $( "#btn2").click(function() {
    	$( "#avro" ).empty();
    	var myRows = {myRows:[]};
    	var $th = $('#tbl th');
    	$('#tbl tbody tr').each(function(i, tr){
    		var obj = {}, 
    		$tds = $(tr).find('td');
    		$td1 = $(tr).find('select.form').children("option:selected");
    		$tds2 = $(tr).find('#checkbox');
    		$tds3 = $(tr).find('#null');
    		$th.each(function(){
    			obj['name'] = $tds.eq(0).text();
    			var type=$td1.eq(0).val();
    			var nullv=$tds3.eq(0).is(':checked');
    			if (type=='date') {
    				type={type:"int", logicalType:"date"};
    			}else if (nullv == true) {
    				type = [type ,'null' ];
    			}
    			else{
    				type;
    			}
    			obj['type'] = type;
    			// obj['univoco'] = $tds2.eq(0).is(':checked');
    			// obj['null'] = $tds3.eq(0).is(':checked');
    		});
    		myRows.myRows.push(obj);		
    	});	
    	console.log(JSON.stringify(myRows));
    	var header = '{ <br> &nbsp &nbsp "type": "record"  ,  <br>   &nbsp &nbsp "namespace": "Mezzora" ,  <br>  &nbsp &nbsp "name": "ReportDSL" ,  <br>   &nbsp &nbsp  "fields": [ <br>';
    	$('#avro').append(header);
    	text ='&nbsp &nbsp &nbsp &nbsp &nbsp '+JSON.stringify(myRows.myRows[0])  +"<br>";
    	$('#avro').append(text);
    	var i;
    	for (i = 1; i < myRows.myRows.length; i++) {
    		text ='&nbsp &nbsp &nbsp &nbsp &nbsp  ,'+ JSON.stringify(myRows.myRows[i])  +"<br>";
    		$('#avro').append(text);
    	}
    	var footer = '&nbsp &nbsp &nbsp &nbsp  ] <br> }';
    	$('#avro').append(footer);	
    });
    
    $( "#btn3").click(function() {
    	$( "#sql" ).empty();
    	var myRows = {myRows:[]};
    	var $th = $('#tbl th');
    	$('#tbl tbody tr').each(function(i, tr){
    		var obj = {}, 
    		$tds = $(tr).find('td');
    		$td1 = $(tr).find('select.form').children("option:selected");
    		$tds2 = $(tr).find('#checkbox');
    		$tds3 = $(tr).find('#null');
    		$th.each(function(){
    			obj['name'] = $tds.eq(0).text();
    			var type=$td1.eq(0).val();
    			var nullv=$tds3.eq(0).is(':checked');
    			if (nullv == false) {
    				type= type +' not null';
    			}else if (nullv == true) {
    				type = type +' null' ;
    			}else{
    				if (type == 'string') {
    					type = 'varchar(50)';
    				}else{
    					type;
    				}
    			}			
    			obj['type'] = type;
    			// obj['univoco'] = $tds2.eq(0).is(':checked');
    			// obj['null'] = $tds3.eq(0).is(':checked');
    		});
    		myRows.myRows.push(obj);		
    	});	
    	console.log(JSON.stringify(myRows));
    	var header = 'CREATE TABLE [schema].[tblName] &nbsp ( <br>';
    	$('#sql').append(header);
    	var i;
    	for (i = 0; i < myRows.myRows.length; i++) {
    		text ='&nbsp &nbsp &nbsp &nbsp &nbsp '+ JSON.stringify(myRows.myRows[i].name).replace('\"',' ').replace('\"',' ') +JSON.stringify(myRows.myRows[i].type).replace('\"',' ').replace('\"',' ')   +", <br>";
    		$('#sql').append(text);
    	}
    	var footer = ');';
    	$('#sql').append(footer);	
    });
    .parent {
    	display: grid;
    	grid-template-columns: repeat(10, 1fr);
    	grid-template-rows: repeat(12, 1fr);
    	grid-column-gap: 20px;
    	grid-row-gap: 0px;
    	}
    	
    	.div1 { grid-area: 1 / 2 / 2 / 11; }
    	.div2 { grid-area: 5 / 6 / 6 / 7; }
    	.div3 { grid-area: 3 / 1 / 5 / 12; }
    	.div4 { grid-area: 2 / 1 / 3 / 2; }
    	.div5 { grid-area: 6 / 1 / 13 / 4; }
    	.div6 { grid-area: 6 / 4 / 13 / 8; }
    	.div7 { grid-area: 6 / 8 / 13 / 12; }
    	
    	.div6 , .div7{
    
    		border: 1px solid black;
    		margin-right: 10px;
    	}
    
    	#btn1{
    		margin-bottom: 30px;
    	}
    <!DOCTYPE html>
    <html lang="en">
    
    <head>
    	<meta charset="UTF-8">
    	<meta name="viewport" content="width=device-width, initial-scale=1.0">
    	<meta http-equiv="X-UA-Compatible" content="ie=edge">
    	<title></title>
      <script  src="https://code.jquery.com/jquery-3.4.1.js"  integrity="sha256-WpOohJOqMqqyKL9FccASB9O0KwACQJpFTUBLTYOVvVU="	crossorigin="anonymous"> </script>
      <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
    </head>
    <body>
    	<h2>Insert the text and the separator and press the button: </h2>
    	<div class="parent">
    		<div class="div1"></div>
    		<div class="div2 btn btn-alert"></div>
    		<div class="div3">
          <textarea name="text" id="text" cols="120" rows="4" placeholder="ColummnName1,ColummName2,ColummName3,ColummName4,............"></textarea>
          <input type="button"	value="Convert to Table" id="btn1" class="btn btn-primary"> 
        </div>
    		<div class="div4">Separatetor<input type="text" name="sep" id="sep" value=","> </div>
    		<div class="div5">
    			<input type="button" value="Convert to Avro" class="btn btn-success" id="btn2">
    			<input type="button" value="Convert to SQL Create" class="btn btn-info" id="btn3">
    			<div id="table"></div>
    		</div>
    		<div class="div6 card">
    			<div class="card-header font-weight-bolder">Avro Format</div>
    			<div class="card-body" id='avro'>
    			</div>
    		</div>
    		<div class="div7 card">
    			<div class="card-header font-weight-bolder">SQL Create</div>
    			<div class="card-body" id='sql'>
    			</div>
    		</div>
    	</div>
    </body>
    </html>

    0 讨论(0)
  • 2020-12-28 19:05

    https://github.com/apache/avro/blob/trunk/lang/java/mapred avro source code do have examples. e.g. TestReflectJob help me to write map-reduce job using my pre-defined domain objects

    0 讨论(0)
  • 2020-12-28 19:07

    Other links:

    • JavaDocs are sometimes needed.
    • This InfoQ article may be of some use
    • Avro Serialization

    The main problem I see with documentation (little that does exist) is that it focuses on very laborious "generic" approach; which seems odd because it combines worst of both world -- you must still provide full schema for data, but get no benefit from static types or such. The automatic code-generation is more convenient, but less well covered.

    0 讨论(0)
提交回复
热议问题